zoukankan      html  css  js  c++  java
  • python网络编程学习笔记(一)

    python网络编程学习笔记(一)

    python网络编程基础,第四版
    pycharm实现,python版本2.7.5

    第一部分 底层网络

    一、笔记

    #coding=utf-8
    第一章
    
    import socket,sys
    port=70
    print (len(sys.argv))
    for i in range(len(sys.argv)):
        print (sys.argv[i])
    host=sys.argv[1]
    filename=sys.argv[2]
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    print ("old s is:",s)
    print (host)
    s.connect((host,port))
    print ("new s is:",s)
    
    s.sendall(filename+"
    ")
    
    while 1:
        buf=s.recv(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    

    加入错误处理

    import socket,sys
    port=70
    print (len(sys.argv))
    for i in range(len(sys.argv)):
        print (sys.argv[i])
    host=sys.argv[1]
    filename=sys.argv[2]
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    print ("old s is:",s)
    print (host)
    
    try:
      s.connect((host, port))
    except socket.gaierror,e:
        print ("ERROR connection to server:%s" %e)
        sys.exit(1)
    s.sendall(filename+"
    ")
    
    while 1:
        buf=s.recv(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    
    文件接口类重写
    import socket,sys
    port=70
    host=sys.argv[1]
    filename=sys.argv[2]
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    fd=s.makefile('rw',0)
    fd.write(filename+"
    ")
    
    
    for line in fd.readlines():
        sys.stdout.write(line)
    

    基本服务器操作

    import socket
    
    host=''
    port=80
    
    s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    print "Server is running on port %d;press ctrl-c to
        terminate."% port
    
    while 1:
        clientsock,clientaddr=s.accept()
        clientfile=clientsock.makefile('rw',0)
        clientfile.write("welcome,"+str(clientaddr)+"/n")
        clientfile.write("Please enter a string:")
        line=clientfile.readline().strip()
        clientfile.write("You entered %d characters.
    "%len(line))
        clientfile.close()
        clientsock.close()
    

    高级接口

    import gopherlib,sys
    host=sys.argv[1]
    file=sys.argv[2]
    
    f=gopherlib.send_selector(file,host)
    for line in f.readlines():
        sys.stdout.write(line)
    
    import urllib,sys
    host=sys.argv[1]
    file=sys.argv[2]
    
    f=urllib.urlopen('gopher://%s%s'%(host,file))
    for line in f.readlines():
        sys.stdout.write(line)
    
    import urllib,sys
    f=urllib.urlopen(sys.argv[1])
    while 1:
        buf=f.read(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    

    第二章
    使用udp

    第三章 网络服务器

    import socket
    solist=[x for x in dir(socket) if x.startswith('SO')]
    solist.sort()
    for x in solist:
        print x
    

    第四章 域名系统

    import sys,socket
    
    result=socket.getaddrinfo(sys.argv[1],None)
    print result[0][4]
    
    import sys,socket
    
    result=socket.getaddrinfo(sys.argv[1],None)
    counter=0
    for item in result:
        print "%-2d:%s"%(counter,item[4])
        counter+=1
    
    P70
    import sys,socket
    result=socket.getaddrinfo(sys.argv[1],None,0,socket.SOCK_STREAM)
    counter=0
    for item in result:
        print "%-2d:%s"%(counter,item[4])
        counter+=1
    

    执行反向查询

    import sys,socket
    
    try:
        result=socket.gethostbyaddr(sys.argv[1])
    
        print "Primary hostname:"
        print " "+result[0]
    
        print "
    Addresses:"
        for item in result[2]:
            print " "+item
    
    except socket.herror,e:
        print "Couldn't look up name:",e
    

    正反向查询

    import sys,socket
    def getipaddrs(hostname):
         result=socket.getaddrinfo(hostname,None,0,socket.SOCK_STREAM)
         return [x[4][0] for x in result]
    def gethostname(ipaddr):
        return socket.gethostbyaddr(ipaddr)[0]
    
    try:
        hostname=gethostname(sys.argv[1])
        ipaddrs=getipaddrs(hostname)
    except socket.herror,e:
        print "NO host names available for %s;it may be normal"%sys.argv[1]
        sys.exit(0)
    except socket.gaierror,e:
        print "Got hostname %s,but it could not be forward-resolved:%s"%(hostname,str(e))
        sys.exit(1)
    
    if not sys.argv[1] in ipaddrs:
        print "GOt hostnae %s,but no forward lookup,"% hostname
        print "original IP %s did not appear in IP address list"% sys.argv[1]
        sys.exit(1)
    
    print "Validated hostname:",hostname
    

    获得完整域名,gethostname()获得主机名,getfqdn()获得完整信息,getaddrinfo()获得该域名对性的IP地址。

    import sys,socket
    
    def getipaddrs(hostname):
        result=socket.getaddrinfo(hostname,None,0,socket.SOCK_STREAM)
        return [x[4][0] for x in result]
    
    hostname=socket.gethostname()
    print "Host name:",hostname
    
    print "Fully-qualified name:",socket.getfqdn(hostname)
    try:
        print "IP addresses:",",".join(getipaddrs(hostname))
    except socket.gaierror,e:
        print "Couldn't not get IP addresses:",e
    
    import sys,DNS
    query=sys.argv[1]
    DNS.DiscoverNameServers()
    
    reqobj=DNS.Request()
    
    answerobj=reqobj.req(name=query,qtrpe=DNS.Type.ANY)
    if not len(answerobj.answers):
        print "NOT found."
    for item in answerobj.answers:
        print "%-5s %s"%(item['typename'],item['data'])
    
    import sys,DNS
    
    def hierquery(qstring,qtype):#给出主机名的相应服务器
        reqobj=DNS.Request()#建立查询对象实例
        try:
            answerobj=reqobj.req(name=qstring,qtype=qtype)
            answers=[x['data'] for x in answerobj.answers if x['type']==qtype]
        except DNS.Base.DNSError:
            answers=[]
        if len(answers):
            return answers
        else:
            remainder=qstring.split(".",1)
            if len(remainder)==1:
                return None
            else:
                return hierquery(remainder[1],qtype)
    
    
    def findnameservers(hostname):#取得权威名称服务器列表
        return hierquery(hostname,DNS.Type.NS)
    
    def getrecordsfromnameserver(qstring,qtype,nslist):#在服务器查询,直到找到答案或者查完该表
        for ns in nslist:
            reqobj=DNS.Request(server=ns)
            try:
                answers=reqobj.req(name=qstring,qtype=qtype).answers
                if len(answers):
                    return answers
            except DNS.Base.DNSError:
                pass
            return []
    
    def nslookup(qstring,qtype,verbose=1):
        nslist=findnameservers(qstring)
        if nslist==None:
            raise RuntimeError,"Could not find nameserver to use."
        if verbose:
            print "using nameserver:",",".join(nslist)
        return getrecordsfromnameserver(qstring,qtype,nslist)
    
    if __name__=='__main__':
        query=sys.argv[1]
        DNS.DiscoverNameServers()
    
        answers=nslookup(query,DNS.Type.ANY)
        if not len(answers):
            print "not found."
        for item in answers:
            print "%-5s %s"%(item['typename'],item['data'])
    

    第五章
    超时的用法
    echoserver.py

    import socket,traceback
    
    host = ''
    port = 51432
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    while True:
        try:
            clientsock,clientaddr = s.accept()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
            continue
    
        try:
            print "Got connection from",clientsock.getpeername()
            while  True:
                data = clientsock.recv(4096)
                if not len(data):
                    break
                clientsock.sendall(data)
        except (KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    
        try:
            clientsock.close()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
    
    
    import struct,sys
    
    def htones(num):
        return struct.pack('!H',num)
    
    def htonl(num):
        return struct.pack('!I',num)
    
    def ntohs(data):
        return struct.unoack('!H',data)[0]
    
    def ntohl(data):
        return struct.unpack('!I',data)[0]
    
    def sendstring(data):
        return htonl(len(data))+data
    
    print "Enter a string:"
    str=sys.stdin.readline().rstrip()
    
    print repr(sendstring(str))
    
    
    import socket,sys
    
    host,port = sys.argv[1:]
    
    results = socket.getaddrinfo(host,port,0,socket.SOCK_STREAM)
    
    for result in results:
        print "-"*60
    
        if result[0] == socket.AF_INET:
            print "Family: AF_INET"
        elif result[0] == socket.AF_INET6:
            print "Family: AF_INET6"
        else:
            print "Family:",result[0]
    
        if result[1] == socket.SOCK_STREAM:
            print "Socket Type: SOCK_STREAM"
        elif result[1] == socket.SOCK_DGRAM:
            print "Socket Type: SOCK_DGRAM"
    
        print "Protocol:",result[2]
        print "Canonical Name:",result[3]
        print "Socket Address:",result[4]
    

    先找ipv4,再找ivp6
    Connect Example with ipv6 Awareness ------------- ipv6connect.py

    import socket,sys
    
    def getaddrinfo_pref(host,port,socktype,familypreference=socket.AF_INET):#ipv4
    
        results = socket.getaddrinfo(host,port,0,socktype)
    
        for result in results:
            if result[0] == familypreference:
                return result
        return results[0]
    
    host = sys.argv[1]
    port = 'http'
    
    c = getaddrinfo_pref(host,port,socket.SOCK_STREAM)
    print "Connecting to",c[4]
    
    s = socket.socket(c[0],c[1])
    s.connect(c[4])
    s.sendall("HEAD / HTTP/1.0
    
    ")
    
    while True:
        buf = s.recv(4096)
    
        if not len(buf):
            break
        sys.stdout.write(buf)
    
    
    Echo Server Bound to Specific Address
    bindserver.py
    
    import socket,traceback
    
    host = '127.0.0.1'
    port = 51423
    
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    
    while True:
        clientsock,clientaddr = s.accept()
    
        print "Got connection from",clientsock.getpeername()
    
        while True:
            data = clientsock.recv(4096)
            if not len(data):
                break
            clientsock.sendall(data)
    
        clientsock.close()
    
    pull()
    
    import socket,sys,select
    
    
    port = 51423
    host = 'localhost'
    
    spinsize = 10
    spinpos = 0
    spindir = 1
    
    def spin():
        global spinsize,spinpos,spindir
    
        spinstr = '.' * spinpos + '|' + '.'*(spinsize-spinpos-1)
        sys.stdout.write('
    '+spinstr+' ')
        sys.stdout.flush()
    
        spinpos += spindir
    
        if spinpos < 0:
            spindir = 1
            spinpos = 1
        elif spinpos >= spinsize:
            spinpos -= 2
            spindir = -1
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    
    p = select.poll()
    
    p.register(s.fileno(),select.POLLIN | select.POLLERR | select.POLLHUP)
    
    while True:
        results = p.poll(50)
    
        if len(results):
            if results[0][1] == select.POLLIN:
                data = s.recv(4096)
                if not len(data):
                    print ("
    Remove end closed connection ; exiting.")
                    break
                sys.stdout.write("
    Received: " + data)
                sys.stdout.flush()
    
            else:
                print "
    Problem occurred exitng."
                sys.exit(0)
        spin()
    
    
    selectclient.py
    
    import socket,sys,select
    
    port = 51423
    host = 'localhost'
    
    spinsize = 10
    spinpos = 0
    spindir = 1
    
    def spin():
        global spinsize,spinpos,spindir
    
        spinstr = '.' * spinpos + '|' + '.' *(spinsize - spinpos -1)
        sys.stdout.write('
    ' + spinstr +' ')
        sys.stdout.flush()
    
        spinpos += spindir
    
        if spinpos < 0:
            spindir = 1
            spinpos = 1
        elif spinpos >= spinsize:
            spinpos -= 2
            spindir = -1
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    
    while True:
        infds,outfds,errfds = select.select([s],[],[s],0.05)
    
        if len(infds):
    
            data = s.recv(4096)
            if not len(data):
                print("
    Remote end closed connection; Exiting.")
                break
            sys.stdout.write("
    Received: " + data)
            sys.stdout.flush()
    
        if len(errfds):
            print "
    Problen occurred; exiting."
            sys.exit(0)
        spin()
    

    二、TCP通信方式

    服务端:

    #coding=utf-8
    from socket import*
    
    #监听套接字的连接和回应
    #服务器端
    myHost='' #‘’代表主机所有可用端口
    myPort=50007
    
    sockobj=socket(AF_INET,SOCK_STREAM)#创建一个TCP scoket 对象
    sockobj.bind((myHost,myPort))#绑定服务端口号
    sockobj.listen(5)#监听,允许5个挂起连接
    
    while True:#一直监听直到进程被杀死
        connection,address=sockobj.accept()#等待下个客户端连接
        print('Server connected by:',address)#连接是新的scoket
        while True:
            data=connection.recv(1024)#读取新的客户端scoket,for 循环接收
            if not data:break#发送接收报文给客户端
            connection.send(b'Echo get your message:'+data)#直到结束关闭scoket,发送只能是b,bite格式
        connection.close()
    

    客户端:

    #coding=utf-8
    import socket
    
    #客户端
    import sys
    from socket import *
    serverHost='localhost'
    serverPort=50007
    
    message=[b'hello network world']
    
    if len(sys.argv)>1:
        serverHost=sys.argv[1]
        if len(sys.argv)>2:
            message=(x.encode()for x in sys.argv[2:])
    
    sockobj=socket(AF_INET,SOCK_STREAM)
    sockobj.connect((serverHost,serverPort))
    
    for line in message:
        sockobj.send(line)
        data=sockobj.recv(1024)#1024字节
        print('Client received:',data)
    
    sockobj.close()
    

    三、UDP通信方式

    UDP请求端:

    #coding=utf-8
    import socket,sys
    
    host=sys.argv[1]
    textpost=sys.argv[2]
    
    
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    try:
        port=int(textpost)
    except ValueError:
        print "输入错入"
        port=socket.getservbyname(textpost,'udp')
    
    s.connect((host,port))
    print "Enter data to transmit: "
    data=sys.stdin.readline().strip()
    s.sendall(data)
    print "Looking for replies."
    while(1):
        buf=s.recv(2048)
        if not len(buf):
            break
        sys.stdout.write(buf)
    UDP应答端:
    #coding=utf-8
    import socket,traceback
    
    host=''
    port=54132
    
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    
    while 1:
        try:
            message,address=s.recvfrom(8192)
            print "Got data from",address
            s.sendto(message,address)
        except(KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    UDP查询时间
    服务端:
    #coding=utf-8
    import socket,traceback,time,struct
    
    host=''
    port=51432
    
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    
    while 1:
        try:
            message,address=s.recvfrom(8192)
            print message,address
            secs=int(time.time())
            secs-=60*60*24
            secs+=220898800
            reply=struct.pack("!I",secs)
            s.sendto(reply,address)
        except(KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    

    客户端:

    #coding=utf-8
    import socket,sys,struct,time
    hostname='localhost'
    port=51432
    
    host=socket.gethostbyname(hostname)
    s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.sendto('',(host,port))
    
    print "Loking for replies"
    buf=s.recvfrom(2048)[0]
    if len(buf)!=4:
        print "Wrong-size reply %d:%s"%(len(buf),buf)
        sys.exit(1)
    
    secs=struct.unpack("!I",buf)[0]
    secs-=220898800
    print time.ctime(int(secs))
    超时:
    #coding=utf-8
    import socket,traceback
    
    host = ''
    port = 51432
    
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.bind((host,port))
    s.listen(1)
    
    while True:
        try:
            clientsock,clientaddr = s.accept()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
            continue
    
        try:
            print "Got connection from",clientsock.getpeername()
            while  True:
                data = clientsock.recv(4096)
                if not len(data):
                    break
                clientsock.sendall(data)
        except (KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    
        try:
            clientsock.close()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
    四、ftp
    #coding=utf-8
    #自动抓取并打开远程文件文件
    import os,sys
    from getpass import getpass
    from ftplib import FTP
    
    nonpassive=False
    filename='monkeys.jpg'
    dirname='.'
    sitename='ftp.rmi.net'
    userinfo=('lutz',getpass('pwd?'))
    if len(sys.argv)>1:filename=sys.argv[1]
    
    print('Connection...')
    connection=FTP(sitename)
    connection.login(*userinfo)
    connection.cwd(dirname)
    if nonpassive:
        connection.set_pasv(False)
    
    print('Downloading...')
    localfile=open(filename,'wb')
    connection.retrbinary('RETR'+filename,localfile.write,1024)
    connection.quit()
    localfile.close()
    
    if input('Open file?') in ['Y','y']:
        from PP4E.System.Media.playfile import playfile
        playfile(filename)
    

    五、广播

    接收端:

    #coding=utf-8
    import socket,traceback
    
    host = ''
    port = 51423
    
    s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_BROADCAST,1)
    s.bind((host,port))
    
    while True:
        try:
            message,address =s.recvfrom(8192)
            print "Got data from ",address
            s.sendto("I am here",address)
        except (KeyboardInterrupt,SystemExit):
            raise
        except:
            traceback.print_exc()
    

    发送端:

    #coding=utf-8
    import socket,sys
    dest = ('<broadcast>',51423)
    
    s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    s.setsockopt(socket.SOL_SOCKET,socket.SO_BROADCAST,1)
    s.sendto("Hello",dest)
    
    print "Looking for replies; press Ctrl-C to stop."
    
    while True:
        (buf,address) = s.recvfrom(2048)
        if not len(buf):
            break
        print "Received from %s: %s" % (address,buf)
    

    第二部分web Service

    一、web客户端访问

    urllib2扩展性更好
    1.下载Web界面
    2.在远程HTTP服务器上验证
    3.提交表单(from)数据
    4.处理错误
    5.与非HTTP协议通信

    1.下载Web界面
    (1)

    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    fd=urllib2.urlopen(req)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    sys.stdout 是标准输出文件。write就是往这个文件写数据。
    合起来就是打印数据到标准输出。类似print

    运行结果:

    D:pythonpython.exe E:/code/python/unit6/dump_page.py
    http://www.example.com

    <!doctype html>
    <html>
    <head>
        <title>Example Domain</title>
    
        <meta charset="utf-8" />
        <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1" />
        <style type="text/css">
        body {
            background-color: #f0f0f2;
            margin: 0;
            padding: 0;
            font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
    
        }
        div {
             600px;
            margin: 5em auto;
            padding: 50px;
            background-color: #fff;
            border-radius: 1em;
        }
        a:link, a:visited {
            color: #38488f;
            text-decoration: none;
        }
        @media (max- 700px) {
            body {
                background-color: #fff;
            }
            div {
                 auto;
                margin: 0 auto;
                border-radius: 0;
                padding: 1em;
            }
        }
        </style>
    </head>
    
    <body>
    <div>
        <h1>Example Domain</h1>
        <p>This domain is established to be used for illustrative examples in documents. You may use this
        domain in examples without prior coordination or asking for permission.</p>
        <p><a href="http://www.iana.org/domains/example">More information...</a></p>
    </div>
    </body>
    </html>
    

    Process finished with exit code 0

    (2)

    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    fd=urllib2.urlopen(req)
    print "Retrieved",fd.geturl()
    info=fd.info()
    for key,value in info.items():
        print "%s=%s"%(key,value)
    

    运行结果如下:
    D:pythonpython.exe E:/code/python/unit6/dump_info.py http://httpd.apache.org/dev
    Retrieved http://httpd.apache.org/dev/
    content-length=8870
    accept-ranges=bytes
    vary=Accept-Encoding
    server=Apache/2.4.7 (Ubuntu)
    last-modified=Wed, 25 Jan 2017 14:38:55 GMT
    connection=close
    etag="22a6-546ec313cb061"
    date=Fri, 17 Mar 2017 06:29:52 GMT
    content-type=text/html

    Process finished with exit code 0

    注:从geturl()得到的值与传入Request的对象不同,结尾处多了一条斜线,远程服务器做了一个Http转向,urllib自动跟随了转向。
    其他行显示Http的header信息;

    2.在远程HTTP服务器上验证

    (1)

    #coding=utf-8
    import sys,urllib2,getpass
    
    class TerminalPassword(urllib2.HTTPPasswordMgr):
        def find_user_password(self, realm, authuri):
            ret=urllib2.HTTPPasswordMgr.find_user_password(self,realm,authuri)
    
            if ret[0] == None and ret[1] == None:
                sys.stdout.write("Login reauired for %s at %sn" % (realm,authuri))
                sys.stdout.write("Username: ")
                username = sys.stdin.readline().rstrip()
                password = getpass.getpass().rstrip()
                return (username, password)
            else:
                return ret
    req = urllib2.Request(sys.argv[1])
    opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(TerminalPassword()))
    response = opener.open(req)
    print response.read()
    

    扩展urllib2.HTTPPasswordMgr类,允许程序在需要的时候像操作员询问用户名和密码,
    build_opener:允许指定额外的处理程序,代码需要支持认证,所以HTTPBasicAuthHandler加到处理链接

    3.提交表单(from)数据
    GET方法:把表单数据编码至url,在给出请求的页面后,加一个问号,接着是表单的元素。每个键和值对用“&”分割,有些字符需要被避免。不适合数据量比较大的地方。
    (1)

    代码:
    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    fd=urllib2.urlopen(req)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    sys.stdout 是标准输出文件。write就是往这个文件写数据。
    合起来就是打印数据到标准输出。类似print

    运行结果:
    D:pythonpython.exe E:/code/python/unit6/dump_page.py http://weixin.sogou.com/weixin?p=01030402&query=博客园&type=2&ie=utf8

    <!doctype html>

    注:必须给url加上引号 (2) 代码:
    #coding=utf-8
    import sys,urllib2,urllib
    
    def addGETdata(url,data):
        return url+'?'+urllib.urlencode(data)
    
    zipcode=sys.argv[1]
    url=addGETdata('http://www.weather.com.cn/cgi-bin/findweather/getForecast',[('query',zipcode)])
    
    print "using URL",url
    req=urllib2.Request(url)
    fd=urllib2.urlopen(req)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    注:函数addGETdata(url,data)负责在url结尾添加所有的数据。在内部,他在URL和通过urllib.urlencode()得到的数据间添加问号。

    POST方法:单独部分发送。URL永远不会被修改,附加信息通过第二个参数传递给urlopen().
    (3)
    代码:

    #coding=utf-8
    import sys,urllib2,urllib
    
    zipcode=sys.argv[1]
    url='http://www.wunderground.com/cgi-bin/findweather/getForcecast'
    data=urllib.urlencode([('query',zipcode)])
    req=urllib2.Request(url)
    fd=urllib2.urlopen(req,data)
    while 1:
        data=fd.read(1024)
        if not len(data):
            break
        sys.stdout.write(data)
    

    4.处理错误

    (1)
    代码:

    #coding=utf-8
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    
    try:
        fd=urllib2.urlopen(req)
    except urllib2.URLError,e:
        print "Error reteiveving data:",e
        sys.exit(1)
    print "Retrieved",fd.geturl()
    info=fd.info()
    for key,value in info.items():
        print "%s=%s"% (key,value)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit6/error_basic.py
    https://www.wunderground.com/cgi-bin/findweather/getForcecast
    Error reteiveving data: HTTP Error 404: Not Found

    Process finished with exit code 1

    (2)
    代码:

    #coding=utf-8
    # import sys,urllib2
    #
    # req=urllib2.Request(sys.argv[1])
    #
    # try:
    #     fd=urllib2.urlopen(req)
    # except urllib2.URLError,e:
    #     print "Error reteiveving data:",e
    #     sys.exit(1)
    # print "Retrieved",fd.geturl()
    # info=fd.info()
    # for key,value in info.items():
    #     print "%s=%s"% (key,value)
    
    import sys,urllib2
    
    req=urllib2.Request(sys.argv[1])
    
    try:
        fd=urllib2.urlopen(req)
    except urllib2.HTTPError,e:
        print "Error reteiveving data:",e
        print "Server error document follows:
    "
        print e.read
        sys.exit(1)
    except urllib2.URLError,e:
        print "Error retriveving data",e
        sys.exit(2)
    
    print "Retrieved",fd.geturl()
    info=fd.info()
    for key,value in info.items():
        print "%s=%s"% (key,value)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit6/error_basic.py
    https://www.wunderground.com/cgi-bin/findweather/getForcecast
    Error reteiveving data: HTTP Error 404: Not Found
    Server error document follows:

    <bound method _fileobject.read of <socket._fileobject object at
    0x0216A5B0>>

    Process finished with exit code 1

    注:如果产生了一个HTTPEroor的实力,会捕获异常打印细节。否则,urllib2.URLError类的实例,会显示一条URLError信息。

    读取数据错误:
    通信错误,会使socket模块调用read()函数时发生socket.error;(会通过系统层传递)
    没有通信情况下发送的文档被删节;

    (3)
    代码:

    #coding=utf-8
    import sys,urllib2,socket
    
    req=urllib2.Request(sys.argv[1])
    
    try:
        fd=urllib2.urlopen(req)
    except urllib2.HTTPError,e:
        print "Error retrieving data:",e
        print "Sever error document follows:
    "
        print e.read()
        sys.exit(1)
    except urllib2.URLError,e:
        print "Error retrieving data:",e
        sys.exit(2)
    
    print "Retrieved",fd.geturl()
    
    bytesread=0
    
    while 1:
        try:
            data=fd.read(1024)
        except socket.error,e:
            print "Error reading data:",e
            sys.exit(3)
    
        if not len(data):
            break
        bytesread+=len(data)
        sys.stdout.write(data)
    
        if fd.info().has_key('Content-Length') and long(fd.info()['Content-Length'])!=long(bytesread):
            print "Excepted a document of size %d,but read %d bytes"%(long(fd.info()['Content-Length']),bytesread)
            sys.exit(4)
    

    运行结果:

    
    > D:pythonpython.exe E:/code/python/unit6/erroe_all.py
    > https://www.wunderground.com/cgi-bin/findweather/getForcecast
    > Error retrieving data: HTTP Error 404: Not Found
    > Sever error document follows:
    > 
    > 
    > <!DOCTYPE html>
    > <!--[if IE 9]><html class="no-js ie9"> <![endif]-->
    > <!--[if gt IE 9]><!--> <html class="no-js "> <!--<![endif]-->
    > 	<head>
    > 		<title>Error | Weather Underground</title>
    > 		<link href="//icons.wxug.com/" rel="dns-prefetch" />
    > 		<link href="//api-ak.wunderground.com/" rel="dns-prefetch" />
    > <meta charset="utf-8">
    > <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    
    

    二、解析html和xhtml

    第七章 解析Html 和XHtml p151-p168
    1.提取标题
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    import sys
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.title=''
            self.readingtitle=0
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if tag =='title':
                self.readingtitle = 1
    
        def handle_data(self, data):
            if self.readingtitle:
                self.title += data
    
        def handle_endtag(self, tag):
            if tag == 'title':
                self.readingtitle = 0
    
        def gettitle(self):
            return self.title
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    print "Title is:",tp.gettitle()
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit7/basic_title.py
    E:/code/python/unit7/faqs.html
    Title is: Appendix?B. MySQL 5.6 Frequently Asked Questions

    Process finished with exit code 0

    注:从表中摘取数据,或

    2.改进
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    from htmlentitydefs import entitydefs
    import sys
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.title=''
            self.readingtitle=0
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if tag =='title':
                self.readingtitle = 1
    
        def handle_data(self, data):
            if self.readingtitle:
                self.title += data
    
        def handle_endtag(self, tag):
            if tag == 'title':
                self.readingtitle = 0
        def handle_entityref(self, name):
            if entitydefs.has_key(name):
                self.handle_data(entitydefs[name])
            else:
                self.handle_data('&'+name+';')
    
        def gettitle(self):
            return self.title
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    print "Title is:",tp.gettitle()
    

    etitle.html

    <!DOCTYPE html>
    <html >
    <head>
        <title>Document Title &amp;Intro</title>
    </head>
    <body>
    this is my text.
    </body>
    </html>
    

    运行结果一:

    D:pythonpython.exe E:/code/python/unit7/basic_title.py
    E:/code/python/unit7/etitle.html
    Title is: Document Title Intro

    Process finished with exit code 0
    运行结果二:

    D:pythonpython.exe E:/code/python/unit7/etitle.py
    E:/code/python/unit7/etitle.html
    Title is: Document Title &Intro

    Process finished with exit code 0

    当一个实体出现时,代码检查该实体是否可以识别,可以,转换为相应得知,否则输入流中的文字;

    3.转换字符参考
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    from htmlentitydefs import entitydefs
    import sys
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.title=''
            self.readingtitle=0
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if tag =='title':
                self.readingtitle = 1
    
        def handle_data(self, data):
            if self.readingtitle:
                self.title += data
    
        def handle_endtag(self, tag):
            if tag == 'title':
                self.readingtitle = 0
        def handle_entityref(self, name):
            if entitydefs.has_key(name):
                self.handle_data(entitydefs[name])
            else:
                self.handle_data('&'+name+';')
        def handle_charref(self, name):
            try:
                charnum=int(name)
            except ValueError:
                return
            if charnum<1 or charnum>225:
                return
            self.handle_data(chr(charnum))
    
        def gettitle(self):
            return self.title
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    print "Title is:",tp.gettitle()
    

    4.处理不均衡的标签
    代码:

    #coding=utf-8
    from HTMLParser import HTMLParser
    from htmlentitydefs import entitydefs
    import sys,re
    
    class TitleParser(HTMLParser):
        def __init__(self):
            self.taglevels=[]
            self.handledtags=['title','ul','li']
            self.processing=None
            HTMLParser.__init__(self)
    
        def handle_starttag(self, tag, attrs):
            if len(self.taglevels) and self.taglevels[-1] == tag:
                self.handle_endtag(tag)
    
            self.taglevels.append(tag)
            if tag in self.handledtags:
                self.data = ''
                self.processing = tag
                if tag == 'ul':
                    print"List start"
    
        def handle_data(self, data):
            if self.processing:
                self.data += data
    
        def handle_endtag(self, tag):
            if not tag in self.taglevels:
                return
    
            while len(self.taglevels):
                starttag = self.taglevels.pop()
    
                if starttag in self.handledtags:
                    self.finishprocessing(starttag)
    
                if starttag == tag:
                    break
    
        def cleanse(self):
            self.data = re.sub('s+', ' ', self.data)
    
        def finishprocessing(self, tag):
            self.cleanse()
            if tag == 'title' and tag == self.processing:
                print "Dom title", self.data
            elif tag == 'ul':
                print "List ended"
            elif tag == 'li' and tag == self.processing:
                print "List item", self.data
            self.processing = None
    
        def gettitle(self):
            return self.title
    

    处理特殊值,如果在映射表中有对应的,即采用映射的值,否则为字面值

     def handle_entityref(self, name):
            if entitydefs.has_key(name):
                self.handle_data(entitydefs[name])
            else:
                self.handle_data('&' + name + ';')
    
        def handle_charref(self, name):
            try:
                charnum = int(name)
            except ValueError:
                return
    
            if charnum < 1 or charnum > 255:
                return
    
            self.handle_data(chr(charnum))
    
    fd = open(sys.argv[1])
    tp = TitleParser()
    tp.feed(fd.read())
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit7/4un.py
    E:/code/python/unit7/4un.html
    Dom title DOCTYPE Title & Intro?
    List start
    List item First List item
    List item second list item
    List item second list item
    List ended

    Process finished with exit code 0

    5.一个可以实际工作的例子

    三、XML和XML-RPC

    P169-p190
    展示XML文档:tree,event.基于事件的解析器可以扫描文档,事件解析器可以响应。
    8.2 使用Dom
    代码:

    #coding=utf-8
    from xml.dom import minidom,Node
    
    def scanNode(node,level=0):
        msg = node.__class__.__name__
        if node.nodeType == Node.ELEMENT_NODE:
            msg += ",tag" + node.tagName
        print " " * level * 4, msg
        if node.hasChildNodes:
            for child in node.childNodes:
                scanNode(child, level + 1)
    
    
    doc = minidom.parse("Sample.xml")
    scanNode(doc)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un1.py

     Document
    >      Element,tagbook
    >          Text
    >          Element,tagtitle
    >              Text
    >          Text
    >          Element,tagauthor
    >              Text
    >              Element,tagname
    >                  Text
    >                  Element,tagfirst
    >                      Text
    >                  Text
    >                  Element,taglast
    >                      Text
    >                  Text
    >              Text
    >              Element,tagaffiliation
    >                  Text
    >              Text
    >          Text
    >          Element,tagchapter
    >              Text
    >              Element,tagtitle
    >                  Text
    >              Text
    >              Element,tagpara
    >                  Text
    >                  Element,tagcompany
    >                      Text
    >                  Text
    >              Text
    >          Text
    

    Process finished with exit code 0

    sample.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <book>
        <title> Sample XML Thing </title>
        <author>
            <name>
                <first>Benjamin</first>
                <last>Smith</last>
            </name>
            <affiliation>Springy Widgets,Inc.</affiliation>
        </author>
    
        <chapter number = "1">
            <title>First chapter</title>
            <para>
                I think widgets are great.you should buy lots
                of them from <company>Springy widgets,Inc</company>
            </para>
        </chapter>
    </book>
    

    2.使用dom完全解析
    代码:

    #coding=utf-8
    """
    将XML以文本形式重新格式化输出
    1.使用Node的节点类型,判断下一步如何处理
    2.对不同的节点名(tagName)进行相应的处理
    """
    from xml.dom import minidom, Node
    import re, textwrap
    
    
    class SampleScanner:
        def __init__(self, doc):
            for child in doc.childNodes:
                if child.nodeType == Node.ELEMENT_NODE and child.tagName == "book":
                    """只处理book元素"""
                    self.handleBook(child)
    
        def gettext(self, nodelist):
            """获取当前节点的文本,
            1.如果当前的节点为TEXT_NODE,将文本追加到列表中
            2.如果当前的节点不是TEXT_NODE,递归地调用gettext"""
            retlist = []
            for node in nodelist:
                if node.nodeType == Node.TEXT_NODE:
                    retlist.append(node.wholeText)
                elif node.hasChildNodes:
                    retlist.append(self.gettext(node.childNodes))
    
            return re.sub("s+", " ", "".join(retlist))
    
        def handleBook(self, node):
            """处理Book节点
            1.如果不是ELEMENT_NODE,不予理睬
            2.如果是title,直接打印出文本内容
            3.如果是author,调用handleAuthor,继续处理节点
            4.如果是chapter,调用handleChapter,继续处理节点
            """
            for child in node.childNodes:
                if child.nodeType != Node.ELEMENT_NODE:
                    continue
                if child.tagName == "title":
                    print "Book title is :", self.gettext(child.childNodes)
                if child.tagName == "author":
                    self.handleAuthor(child)
                if child.tagName == "chapter":
                    self.handleChapter(child)
    
        def handleAuthor(self, node):
            """处理Autho节点
            1.如果不是ELEMENT_NODE,不予理睬
            2.如果是name,调用handleAuthoerName,继续处理节点
            3.如果是affiliation,调用gettext,并打印出来
            """
            for child in node.childNodes:
                if child.nodeType != Node.ELEMENT_NODE:
                    continue
                if child.tagName == "name":
                    self.handleAuthorName(child)
                elif child.tagName == "affiliation":
                    print "Author affiliation:", self.gettext([child])
    
        def handleAuthorName(self, node):
            """处理author.name节点
            1.使用getElementsByTagName获得子节点
            2.调用gettext得到子节点的文本,并打印处理
            """
            surname = self.gettext(node.getElementsByTagName("last"))
            givenname = self.gettext(node.getElementsByTagName("first"))
    
            print "Author Name:%s %s " % (surname, givenname)
    
        def handleChapter(self, node):
            """处理chapter节点
            1.如果不是ELEMENT_NODE,不予理睬
            2.如果是para,调用handlePara,继续处理
            """
            print "*** Start of Chapter %s,%s" % (
            node.getAttribute("number"), self.gettext(node.getElementsByTagName("title")))
    
            for child in node.childNodes:
                if child.nodeType != Node.ELEMENT_NODE:
                    continue
                if child.tagName == "para":
                    self.handlePara(child)
    
        def handlePara(self, node):
            """
            1.获取当前节点的文本
            2.调用textwrap格式化文本
            """
            paratext = self.gettext([node])
            paratext = textwrap.fill(paratext)
            print paratext
    
    
    doc = minidom.parse("Sample.xml")
    SampleScanner(doc)
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un2.py
    Book title is : Sample XML Thing
    Author Name:Smith Benjamin
    Author affiliation: Springy Widgets,Inc.
    *** Start of Chapter 1,First chapter
    I think widgets are great.you should buy lots of them from Springy
    widgets,Inc

    Process finished with exit code 0

    3.使用Dom产生文档
    代码:

    #coding=utf-8
    """
    使用minidom生成XML
    1.创建Element,createElement
    2.添加子节点,appendChild
    3.创建Text,createTextNode
    4.创建属性,createAttribute
    """
    from xml.dom import minidom,Node
    
    # 创建Document
    doc = minidom.Document()
    # 创建book节点
    book = doc.createElement("book")
    doc.appendChild(book)
    # 创建Title节点
    title = doc.createElement("title")
    text = doc.createTextNode("Sample XML Thing")
    title.appendChild(text)
    book.appendChild(title)
    # 创建author节点
    author = doc.createElement("author")
    # 创建name节点
    name = doc.createElement("name")
    first = doc.createElement("first")
    first.appendChild(doc.createTextNode("Benjamin"))
    name.appendChild(first)
    
    last = doc.createElement("last")
    last.appendChild(doc.createTextNode("Smith"))
    name.appendChild(last)
    
    author.appendChild(name)
    book.appendChild(author)
    # author节点完毕
    
    # 创建chapter节点
    chapter = doc.createElement("chapter")
    chapter.setAttribute("number","1")
    title = doc.createElement("title")
    title.appendChild(doc.createTextNode("Fisrt Chapter"))
    chapter.appendChild(title)
    
    para = doc.createElement("para")
    para.appendChild(doc.createTextNode("I think widgets are great.you should buy lots 
    of them from"))
    company = doc.createElement("company")
    company.appendChild(doc.createTextNode("Springy widgets,Inc"))
    para.appendChild(company)
    
    chapter.appendChild(para)
    # chapter节点完毕
    book.appendChild(chapter)
    # book节点完毕
    
    print doc.toprettyxml(indent = " ")
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un3.py

    <?xml version="1.0" ?>
    <book>
     <title>Sample XML Thing</title>
     <author>
      <name>
       <first>Benjamin</first>
       <last>Smith</last>
      </name>
     </author>
     <chapter number="1">
      <title>Fisrt Chapter</title>
      <para>
       I think widgets are great.you should buy lots of them from
       <company>Springy widgets,Inc</company>
      </para>
     </chapter>
    </book>
    

    Process finished with exit code 0

    4.dom类型参考

    8.3使用xml-rpc
    5.
    代码:

    #coding=utf-8
    import xmlrpclib
    url='http://liandesinian.blog.51cto.com/7737219/1565474'
    s=xmlrpclib.ServerProxy(url)
    catdata=s.meerkat.getCategories()
    cattiles=[item['title'] for item in catdata]
    cattiles.sort()
    for item in cattiles:
        print item
    

    运行结果:

    D:pythonpython.exe E:/code/python/unit8/un6.py

    Process finished with exit code 0

    代码:

    #coding=utf-8
    import xmlrpclib,sys,textwrap
    
    class NewsCat:
        def __init__(self,catdata):
            self.id=catdata['id']
            self.title=catdata['title']
        def __cmp__(self, other):
            return cmp(self.title,other.title)
    
    class NewsSource:
        def __init__(self,url='http://www.oreillynet.com/meerkat/xml-rpc/server.php'):
            self.s=xmlrpclib.ServerProxy(url)
            self.loadcats()
    
        def loadcats(self):
            print "Loading categories...."
            catdata=self.s.meerkat.getCatgries()
            self.cats=[NewsCat(item) for item in catdata]
            self.cat.sort()
    
        def displaycats(self):
            numonline=0
            i=0
            for item in self.cats:
                sys.stdout.write("%2d:%20.20s"%(i+1,item.title))
                i+=1
                numonline+=1
                if numonline%3==0:
                    sys.stdout.write("
    ")
            if numonline!=0:
                sys.stdout.write("
    ")
    
    
    def promotcat(self):
        sys.__displaycats()
        sys.stdout.write("select a catgory or q to quit")
        selection = sys.stdin.readline().strip()
        if selection == 'q':
            sys.exit(0)
        return int(selection) - 1
    
    
    def dispact(self, cat):
        items = self.s.meerkat.getItems({'category': cat,
                                         'ids': 1,
                                         'descriptions': 1,
                                         'categories': 1,
                                         'channels': 1,
                                         'data': 1,
                                         'num_items': 15})
        if not len(items):
            print "Sorry,no items in that category."
            sys.stdout.write("Press Enter to continue:")
            sys.stdin.readline()
            return
        while 1:
            print self.dispitemsummary(items)
            sys.stdout.write("select a catgory or q to quit")
            selection = sys.stdin.readline().strip()
            if selection=='q':
                 return
    
            self.dispitem(items[int(selection)-1])
    
    
    def dispitemsummary(self, items):
        counter = 0
        for item in items:
            print "%2d:%s"(counter + 1, item['title'])
            counter += 1
    
    
    def dispitem(self, item):
        print "---%s---" % item['title']
        print "Posted on", item['data']
        print "Description:"
        print textwrap.fill(item['description'])
        print "
    link:", item['link']
        sys.stdout.write("
    Press Enter to continue: ")
        sys.stdin.readline()
        n = NewsSource()
        while 1:
            cat = n.promotcat()
            n.dispact(cat)
    
    本性的苏醒,往往在遭遇真实之后。
  • 相关阅读:
    CAP原理、一致性模型、BASE理论和ACID特性
    MyBatis双数据源配置
    MySQL中间件Atlas安装及使用
    MySQL主从切换
    MySQL定时逻辑备份
    MySQL主从搭建
    zabbix监控nginx
    SVN Files 的值“ < < < < < < < .mine”无效。路径中具有非法字符。
    ie8下table的colspan属性与max-with属性的显示错乱问题
    MVC 自定义异常错误页面处理
  • 原文地址:https://www.cnblogs.com/chance88/p/6572879.html
Copyright © 2011-2022 走看看