zoukankan      html  css  js  c++  java
  • Python开发Http代理服务器 socketref,呆在autonavi.com C++博客

    Python开发Http代理服务器 - socketref,呆在autonavi.com - C++博客

    之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。
    简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)

      1 # -*- coding:utf-8 -*-
      2 # http代理服务器
      3 # 1.ip限制,mac限制
      4 #
      5 # socketref@hotmail.com 
      6 # www.sw2us.com
      7 
      8 "exec" "python" "-O" "$0" "$@"
      9 
     10 __doc__ = """sw2us HTTP Proxy.
     11 
     12 """
     13 
     14 __version__ = "0.2.1"
     15 
     16 import BaseHTTPServer, select, socket, SocketServer, urlparse
     17 import httplib,traceback,re
     18 import os,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO
     19 
     20 
     21 class ConfigProperty:
     22     def __init__(self,owner):
     23         self.key=''
     24         self.value=''
     25     
     26     def create(self,text):
     27         #text -  key=value
     28         #@return: boolean
     29         pos = text.find('#')
     30         if(pos !=-1):
     31             text = text[:pos]
     32         pair = text.split('=')
     33         if len(pair) !=2:
     34             #print "Property Line Invalid:%s"%(text)
     35             return False
     36         k = pair[0].strip()
     37         v = pair[1].strip()
     38         self.key = k
     39         self.value = v
     40 
     41         return True
     42     
     43     def toString(self):
     44         s =''
     45         try:            
     46             s = "%s=%s"%(self.key,self.value)            
     47         except:
     48             return ''
     49         return s
     50     
     51     def toInt(self):
     52         r=0
     53         try:
     54             r = int(self.value)
     55         except:
     56             r =0
     57         return r
     58     
     59     def toFloat(self):
     60         r=0.0
     61         try:
     62             r = float(self.value)
     63         except:
     64             r=0.0
     65         return r
     66     
     67     
     68 #@def SimpleConfig
     69 # 简单配置信息文件,基本格式 : key=value
     70 class SimpleConfig:
     71     def __init__(self):
     72         self._file=''
     73         self._props=[]
     74         self._strip = True
     75         
     76     def open(self,file,strip=True):
     77         #打开配置文件
     78         #@param strip - 是否裁剪不可见首尾两端的字符
     79         try:
     80             self._strip = strip 
     81             self._props=[]
     82             fh = open(file,'r')
     83             lines = fh.readlines()            
     84             for text in lines:                
     85                 prop = ConfigProperty(self)
     86                 if prop.create(text) == False:                    
     87                     prop = None
     88                 else:                    
     89                     self._props.append(prop)                    
     90             fh.close()
     91         except:            
     92             return False
     93         return True
     94 
     95     def toString(self):
     96         s=''
     97         for p in self._props:
     98             s = s + p.toString() +"\n"
     99         return s
    100     
    101     def saveAs(self,file):
    102         #保存配置信息到文件
    103         try:
    104             fh = open(file,'w')
    105             fh.write(toString())
    106             fh.close()
    107         except:
    108             print "write File Failed!"
    109             return False
    110         return True
    111     
    112     def getProperty(self,name):
    113         #取属性值
    114         prop=None
    115         try:
    116             for p in self._props:
    117                 if p.key == name:
    118                     prop = p
    119                     break
    120         except:
    121             pass
    122         
    123         return prop
    124     
    125     def getPropertyValue(self,key,default=''):
    126         prop = self.getProperty(key)
    127         if not prop:
    128             return default
    129         return prop.value
    130     
    131     def getPropertyValueAsInt(self,name,default=0):
    132         prop = self.getPropertyValue(name)
    133         
    134         if not prop:
    135             return default
    136         r=default
    137         try:
    138             r = int(prop)
    139         except:pass
    140         return r
    141     
    142     def getPropertyValueAsFloat(self,name,default=0.0):
    143         prop = self.getPropertyValue(name)
    144         if not prop:
    145             return default
    146         r = default
    147         try:
    148             r = float(r)
    149         except:pass
    150         return r
    151     
    152 
    153 #===========================================#
    154 
    155     
    156 #===========================================#
    157 
    158 def getMacList():
    159     maclist=[]
    160     f = os.popen('arp -a','r')
    161     while True:
    162         line  = f.readline()
    163         if not line:
    164             break
    165         line = line.strip()
    166         rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}).*',line)
    167         #rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',line)
    168         if rst:
    169             #print rst.groups()
    170             maclist.append(rst.groups())
    171     #print maclist
    172     return maclist
    173 
    174 
    175         
    176 ##########################################
    177 confile = SimpleConfig()
    178 confile.open('proxy.conf')
    179 dbconn = None
    180 
    181 ##########################################
    182 #初始化系统配置
    183 def initConfiguration():
    184     r = True
    185     
    186     return r
    187 
    188 ##########################################
    189 
    190 class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
    191     __base = BaseHTTPServer.BaseHTTPRequestHandler
    192     __base_handle = __base.handle
    193     server_version = "TinyHTTPProxy/" + __version__
    194     rbufsize = 0                        # self.rfile Be unbuffered
    195 
    196 
    197 #######################################################33
    198 
    199     #handle()是在单独线程中执行
    200     def handle(self): # 调用入口,线程刚进入,携带socket进入
    201         print 'client incoming'
    202         #self.__base_handle()
    203         #return 
    204         (ip, port) =  self.client_address
    205         if hasattr(self, 'allowed_clients'and ip not in self.allowed_clients:
    206             self.raw_requestline = self.rfile.readline()
    207             if self.parse_request():
    208                 self.send_error(403)
    209         else:
    210             self.__base_handle()
    211 
    212     def _connect_to(self, netloc, soc):
    213         i = netloc.find(':')
    214         if i >= 0:
    215             host_port = netloc[:i], int(netloc[i+1:])
    216         else:
    217             host_port = netloc, 80
    218         #print "\t" "connect to %s:%d" % host_port
    219         try: soc.connect(host_port)
    220         except socket.error, arg:
    221             try: msg = arg[1]
    222             except: msg = arg
    223             self.send_error(404, msg)
    224             return 0
    225         return 1
    226 
    227     def do_CONNECT(self):
    228         soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    229         try:
    230             if self._connect_to(self.path, soc):
    231                 self.log_request(200)
    232                 self.wfile.write(self.protocol_version +
    233                                                  " 200 Connection established\r\n")
    234                 self.wfile.write("Proxy-agent: %s\r\n" % self.version_string())
    235                 self.wfile.write("\r\n")
    236                 self._read_write(soc, 300)
    237         finally:
    238             print "\t" "bye"
    239             soc.close()
    240             self.connection.close()
    241 
    242         
    243     def do_GET(self):    
    244         (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
    245                 self.path, 'http')
    246         piars = (scm, netloc, path, params, query, fragment)
    247         if not netloc:
    248             netloc = self.headers.get('Host'"")
    249         #print ">>requester:",self.connection.getpeername(),"path:",self.path
    250         #print '>>2. ',(scm, netloc, path, params, query, fragment)
    251         #print 'next host:',netloc
    252         if scm != 'http' or fragment or not netloc:
    253             self.send_error(400"bad url %s" % self.path)
    254             return
    255         soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    256         try:
    257             if self._connect_to(netloc, soc):
    258                 self.log_request()
    259                 soc.send("%s %s %s\r\n" % (
    260                         self.command,
    261                         urlparse.urlunparse(('''', path, params, query, '')),
    262                         self.request_version))
    263                 self.headers['Connection'= 'close'
    264                 del self.headers['Proxy-Connection']
    265                 for key_val in self.headers.items():
    266                     soc.send("%s: %s\r\n" % key_val)
    267                 soc.send("\r\n")
    268                 #到此完成发送请求和头部信息
    269                 self._read_write(soc)
    270         finally:
    271             print "\t" "bye"
    272             soc.close()
    273             self.connection.close()    
    274         
    275 
    276     
    277     def insertTags(self,tag,body,insert):
    278         p1 = body.find('<%s'%tag)
    279         if p1!=-1 :
    280             p2 = body.find('>',p1)
    281             if p2!=-1:
    282                 part1 = body[:p2+1]
    283                 part2 = body[p2+1:]
    284                 print '*-'*20
    285                 body = part1 + insert + part2
    286         return body
    287     
    288     # google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以
    289     # 插入数据之后要重新计算 content-length 并返回给客户浏览器
    290     # 发现压缩的有很多 , content-encoding:gzip
    291     
    292     # 处理 'transfer-encoding': 'chunked'类型
    293     #gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储
    294     #在这里将gzip数据全部解压,还原成原始数据传出到客户端
    295     def sendBackResponse(self,command,headers,body):
    296         
    297         insert='<h1>This is Test </h1>'
    298         if headers.has_key('content-encoding'and headers['content-encoding'].strip().lower()=='gzip':
    299             try:
    300                 del headers['content-encoding']
    301                 gzipdata=''                
    302                 if headers.has_key('transfer-encoding'and headers['transfer-encoding']=='chunked':
    303                     del headers['transfer-encoding']
    304                     
    305                     pos = 0
    306                     while pos < len(body):
    307                         p = body.find('\x0d\x0a',pos)
    308                         sizewidth = p-pos
    309                         
    310                         chuncksize = int(body[pos:p],16)
    311                         #print 'chunck size:',body[pos:p]
    312                         p +=2 
    313                         gzipdata+=body[p:p+chuncksize]
    314                         pos= p+chuncksize+2
    315                         if chuncksize ==0 :
    316                             break
    317                     #
    318                     body = gzipdata
    319                     
    320 #
    321                 
    322                     #ss = zlib.decompress(gzipdata)
    323                 compressedstream = StringIO.StringIO(body)
    324                 gzipper = gzip.GzipFile(fileobj=compressedstream)
    325                 if gzipper == None:
    326                     print '*'*200
    327                 body = gzipper.read()
    328                 #f = open('body%s.txt'%time.time(),'wb')                    
    329                 #f.write(body)
    330                 #f.close()
    331                     
    332                 
    333                     #body = gzipdata
    334             except:
    335                 print traceback.print_exc()
    336                 print 'decompress failed!'
    337                 #pos = body.find('\x0d\x0a')
    338                 #pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
    339                 #if pos!=-1:
    340                 #    body = body[pos+9:]
    341                 #    
    342                 #compressedstream = StringIO.StringIO(body)
    343                 #gzipper = gzip.GzipFile(fileobj=compressedstream)
    344                 #if gzipper == None:
    345                 #    print '*'*200
    346                 #body = gzipper.read()
    347                 
    348                 #body = zlib.decompressobj().decompress('x\x9c'+body)
    349                 
    350         #m = re.search('(<body.*>)',body,re.I)
    351         #if m:
    352         #    pos = m.start(0)
    353         #    part1 = body[:pos+len(m.group(0))]
    354         #    part2 = body[pos+len(m.group(0)):]
    355         #    body = part1 + insert + part2
    356         #    print '-*'*20,insert,'-*'*20
    357         
    358         #self.insertTags('body',body,insert)
    359         
    360         css=""" <style>
    361 #kk{
    362 border:1px dotted red;
    363 200px;
    364 height:300px;
    365 float:left;
    366 background:#0x00ff00;
    367 }
    368 </style>
    369 """
    370         #body =self.insertTags('head',body,css)
    371         
    372         #body =self.insertTags('body',body,insert)
    373         div="""
    374         <div id="kk">
    375         This is Test DIV Block!!
    376 </div> 
    377         """
    378         
    379         #read external html tags
    380         try:
    381             #ff = open('head.tag','r')
    382             #div = ff.read()
    383             #ff.close()
    384             #body =self.insertTags('head',body,div)
    385             body = self.publish_advertisement(body) #插入配置的广告信息
    386         except:
    387             pass
    388         
    389         #p1 = body.find('<body')
    390         #if p1!=-1 :
    391         #    p2 = body.find('>',p1)
    392         #    if p2!=-1:
    393         #        part1 = body[:p2+1]
    394         #        part2 = body[p2+1:]
    395         #        print '*-'*20
    396         #        body = part1 + insert + part2
    397             #print m.group(0)
    398         headers['Content-Length'= str(len(body))
    399             
    400         #if headers.has_key('content-length'):
    401             
    402         self.connection.send(command)
    403         self.connection.send('\r\n')
    404         for k,v in headers.items():
    405             self.connection.send("%s: %s\r\n"%(k,v))
    406         self.connection.send("\r\n")
    407         self.connection.sendall(body)
    408         
    409 
    410         
    411 #----------------------------------------------------
    412 
    413     def _read_write(self, soc, max_idling=20):
    414         #getMacList()
    415         iw = [self.connection, soc] # self.connnection - 内网主机连接,soc - 向外连接
    416         ow = []
    417         count = 0
    418         #respfile = soc.makefile('rb', 1024)
    419         httpCommand=''
    420         httpBody=''
    421         httpHeaders={}
    422         isOkPageResponse=False
    423         nextReadBytes=0
    424         datacnt=0
    425         NoContentLength = False
    426         #print self.connection.getpeername()
    427         while 1:
    428             count += 1
    429             datacnt+=1
    430             (ins, _, exs) = select.select(iw, ow, iw, 3)
    431             if exs:
    432                 print 'error occr!'
    433                 break #异常产生
    434             if ins:
    435                 for i in ins:
    436                     if i is soc:
    437                         out = self.connection
    438                     else:
    439                         out = soc
    440                     
    441                     data = i.recv(8192)
    442                     if data:                        
    443                         out.send(data)
    444                         count = 0
    445                     else:
    446                         if not isOkPageResponse:
    447                             return 
    448             else:
    449                 pass #print "\t" "idle", count
    450             if count == max_idling:
    451                 print 'idling exit'    
    452                 break  # 指定时间内都接收不到双向数据便退出循环 20*3 = 60 secs
    453         
    454 
    455     do_HEAD = do_GET
    456     do_POST = do_GET
    457     do_PUT  = do_GET
    458     do_DELETE=do_GET
    459 
    460 class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
    461                            BaseHTTPServer.HTTPServer): pass
    462 
    463 
    464 
    465 
    466 def serving(HandlerClass,
    467         ServerClass, protocol="HTTP/1.0"):
    468     
    469     if len(sys.argv) <2  or sys.argv[1]!='www.sw2us.com':
    470         sys.exit()
    471     
    472     if sys.argv[2:]:
    473         port = int(sys.argv[2])
    474     else:
    475         
    476         port = confile.getPropertyValueAsInt('httpport',8000)
    477         
    478         #port = 8000
    479         
    480     server_address = ('', port)
    481 
    482     HandlerClass.protocol_version = protocol
    483     httpd = ServerClass(server_address, HandlerClass)
    484 
    485     sa = httpd.socket.getsockname()
    486     print "www.sw2us.com@2010 v.1.0.0"
    487     print "Serving HTTP on", sa[0], "port", sa[1], ""
    488     sys.stdout = buff
    489     sys.stderr = buff
    490         
    491     httpd.serve_forever()
    492         
    493         
    494         
    495 if __name__ == '__main__':
    496     #getMacList()
    497     from sys import argv
    498     
    499     f = open('proxy.pid','w')
    500     f.write(str(os.getpid()))
    501     f.close()
    502     
    503     #ProxyHandler.allowed_clients = []
    504     try:
    505         allowed = []
    506         ss = confile.getPropertyValue('allowed_clients').strip()
    507         hosts = ss.split(',')
    508         for h in hosts:
    509             if h:
    510                 client = socket.gethostbyname(h.strip())
    511                 allowed.append(client)
    512         if len(allowed):
    513             ProxyHandler.allowed_clients = allowed    
    514         buff = StringIO.StringIO()
    515 
    516         serving(ProxyHandler, ThreadingHTTPServer)
    517     except:
    518         pass
  • 相关阅读:
    idea 找不到包或找不到符号
    JOISC部分题解
    欧拉数学习笔记
    [清华集训2017]生成树计数
    [ZJOI2019]开关
    【题解】CF817E Choosing The Commander
    CSP-S 2020游记
    【学习笔记】线段树合并
    【题解】[IOI2005]Riv 河流
    【题解】哈希冲突
  • 原文地址:https://www.cnblogs.com/lexus/p/2476701.html
Copyright © 2011-2022 走看看