zoukankan      html  css  js  c++  java
  • 嘿!我用python帮我干这些事

      python 无疑是当下火上天的语言,但是我们又不拿来工作,那么能拿来干啥呢?我是这么干的。

    1. 平时工作开发用不上,就当个计算器吧!

    python
    # 加减乘除
    >>> (3 + 2) - 5 * 1
    5
    # 位运算
    >>> 3 << 2
    12
    # x ^ y 幂次方运算,不能开方运算
    >>> 3 ** 2
    9
    # 用另一种计算幂次方的运算,可以开方运算
    >>> pow(9, 0.5)
    3.0
    # 作进制转换,如二进制转换,十进制转n进制
    >>> bin(2)
    '0b10'
    >>> hex(25)
    '0x19'
    >>> oct(10)
    '012'
    >>> int('e0', 16)
    224
    # 将十进制转换为二进制,以全0占位形式显示二进制,更方便查看,默认为32位,使用如下图所示
    def decbin(i, bit=32):
      return (bin(((1 << bit) - 1) & i)[2:]).zfill(bit)

    2. 做简单爬虫
    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    
    import urllib,urllib2
    import re
    import os
    import HTMLParser
    dirbase = '/tmp'
    urlbase = 'http://hg.openjdk.java.net'
    url= urlbase + '/jdk8u/jdk8u/jdk/file/dddb1b026323/src'        #/jdk,/hotspot
    skip_to_p = ''
    skip_find = False;
    textmod ={'user':'admin','password':'admin'}
    textmod = urllib.urlencode(textmod)
    print(url)
    req = urllib2.Request(url = '%s%s%s' % (url,'?',textmod))
    res = urllib2.urlopen(req)
    res = res.read()
    alink = re.findall(r'<a',res)
    allflist = []
    
    table=re.findall(r'<tbody class="stripes2">(.+)</tbody>',res, re.S)
    
    harr = re.findall(r'href="(/jdk8u[w/._]+)">(?![up])', table[0])
    
    def down_src_recursion(harr):
      global allflist,skip_find;
      if(not harr):
        return False;
      i=0; arrlen = len(harr)
      lock_conflict_jump_max = 2;   # 遇到文件锁时跳过n个文件,当前仍需跳过的文件数量
      lock_conflict_jumping = 0;
      print("in new dir cur...")
      if(len(allflist) > 1500):
         print('over 1500, cut to 50 exists...')
         allflist = allflist[-800:]
      for alink in harr:
        i += 1;
        alink = alink.rstrip('/')
        if(skip_to_p and not skip_find):
        if(alink != skip_to_p):
          print('skip file, cause no find..., skip=%s,now=%s' % (skip_to_p, alink))
          continue;
        else:
          skip_find = True;
        if(alink in allflist):
          print('目录已搜寻过:' + alink)
          continue;
        pa = dirbase + alink
        if(os.path.isfile(pa)):
          print('文件已存在,无需下载: ' + pa)
          continue;
        lockfile=pa+'.tmp'
        if(os.path.isfile(lockfile)):
        lock_conflict_jumping = lock_conflict_jump_max;
          print('文件正在下载中,跳过+%s...: %s' % (lock_conflict_jumping, lockfile))continue; 
        else:
          if(lock_conflict_jumping > 0):
             lock_conflict_jumping -= 1;
             print('文件正在下载中,跳过+%s...: %s' % (lock_conflict_jumping, lockfile))continue;
        # 首先根据后缀把下载中的标识标记好,因为网络下载时间更慢,等下载好后再加标识其实已为时已晚
        if(pa.endswith(('.gif','.jpg','.png', '.xml', '.cfg', '.properties', '.make', '.sh', '.bat', '.html', '.c','.cpp', '.h', '.hpp', '.java', '.1'))):
           os.mknod(lockfile);
        reqt = urllib2.Request(urlbase + alink)
        rest = urllib2.urlopen(reqt)
        rest = rest.read()
        allflist.append(alink)
        if(rest.find('class="sourcefirst"') > 0):
           print('这是个资源文件:%s         %d/%d' % (alink, i, arrlen))
           if(not os.path.isfile(lockfile)):
              os.mknod(lockfile);
           filename = alink.split('/')[-1]
           linearr = re.findall(r'<span id=".+">(.+)</span>', rest)
           fileObject = open(dirbase + alink, 'w')
           for line in linearr:
              try:
                line = HTMLParser.HTMLParser().unescape(line)
              except UnicodeDecodeError as e:
                print('oops, ascii convert error accour:', e)
              fileObject.write(line + '
    ')
           fileObject.close()
           os.remove(lockfile); 
        else:
          print('这是目录:%s        %d/%d' % (alink, i, arrlen))
          if(not os.path.exists(pa)):
             print('创建目录:%s' % alink)
             os.makedirs('/tmp' + alink, mode=0777)
          ta=re.findall(r'<tbody class="stripes2">(.+)</tbody>',rest, re.S)
          ha = re.findall(r'href="(/jdk8u[w/._]+)">(?![up])', ta[0])
          down_src_recursion(ha)
    
    # go...
    down_src_recursion(harr);

    以上python2 版本的爬虫,在python3中则要改编下呢!
    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    # for python3
    
    import urllib.parse
    import urllib.request
    import re
    import os
    import html
    dirbase = '/tmp'
    urlbase = 'http://hg.openjdk.java.net'
    url= urlbase + '/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/sun/misc'
    #skip_to_p = '/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/sun/misc'
    skip_to_p = ''
    skip_find = False;
    textmod ={'user':'admin','password':'admin'}
    textmod = urllib.parse.urlencode(textmod)
    print(url)
    res = urllib.request.urlopen(url = '%s%s%s' % (url,'?',textmod))
    res = res.read().decode('utf-8')
    alink = re.findall(r'<a', res)
    allflist = []
    
    table=re.findall(r'<tbody class="stripes2">(.+)</tbody>',res, re.S)
    
    harr = re.findall(r'href="(/jdk8u[w/._]+)">(?![up])', table[0])
    
    def down_src_cur(harr):
        global allflist,skip_find;
        if(not harr):
            return False;
        i=0; 
        arrlen = len(harr);
        print("- In new dir cur...")
        if(len(allflist) > 1500):
            print('- Over 1500, cut to 50 exists...')
        allflist = allflist[-800:]
        for alink in harr:
            i += 1;
            alink = alink.rstrip('/')
            if(skip_to_p and not skip_find):
                if(alink != skip_to_p):
                    print('- Skip file, cause no find..., skip=%s,now=%s' % (skip_to_p, alink))
                    continue;
                else:
                    skip_find = True;
            if(alink in allflist):
                print('- Searched before:' + alink)
                continue;
            rest='';
            try:    
                res = urllib.request.urlopen(urlbase + alink)
                rest = res.read().decode('utf-8')
            except Exception as e:
                print(e)
                print(" ERROR accour, continue;")
                continue;
            allflist.append(alink)
            if(rest.find('class="sourcefirst"') > 0):
                print('- Code resourse:%s         %d/%d' % (alink, i, arrlen))
                filename = alink.split('/')[-1]
                linearr = re.findall(r'<span id=".+">(.+)</span>', rest)
                fileObject = open(dirbase + alink, 'w')
                for line in linearr:
                    fileObject.write(html.unescape(line) + '
    ')
                fileObject.close()
            else:
                pa = dirbase + alink
                print('- Directory:%s        %d/%d' % (alink, i, arrlen))
                if(not os.path.exists(pa)):
                    print('makedirs:%s' % alink);
                    os.makedirs('/tmp' + alink, mode=0o777 );
                ta=re.findall(r'<tbody class="stripes2">(.+)</tbody>',rest, re.S)
                ha = re.findall(r'href="(/jdk8u[w/._]+)">(?![up])', ta[0])
                # 递归爬取解析
                down_src_cur(ha)
    
    down_src_cur(harr);

    做文件搜索,替换:
    4. 做简单代码验证
    # 做简单字符查找验证
    >>> '234234fdgdfs'.find('f')
    6
    >>> '234234fdgdfs'.index('f')
    6
    >>> '234234fdgdfs'[2:5]
    '423'
    # 做正则匹配
    >>> re.findall(r'[a-zA-Z0-9]*.[a-zA-Z1-9]*[.|com]*', 'www.baidu.com')
    ['www.baidu.com']

    5. 写个运维脚本,监听本机8080端口的运行状态,如果发现挂了,就发送邮件通知主人,并重启服务器。

    #!/usr/bin/env python
    #!coding=utf-8
    import os
    import time
    import sys
    import smtplib
    from email.mime.text import MIMEText
     
    def send_email (warning):
        msg = MIMEText(warning)
        msg['Subject'] = 'python send warning mail'
        msg['From'] = '测试了<rootrr@163.com>'
        try:
           smtp = smtplib.SMTP()
           to_mail = 'xx@163.com'
           from_mail = 'xx@163.com'
           smtp.connect(r'smtp.qiye.163.com')
           smtp.login('xx@163.com', 'xxx123')
           smtp.sendmail(from_mail, to_mail, msg.as_string())
           smtp.close()
           print('send mail to %s, content is: %s' % (to_mail, msg))
        except Exception as e:
           print("Send mail Error: %s" % e)
    # 监听状态中。。。
    while True:
        http_status = os.popen('netstat -tulnp | grep ":8080"','r').readlines()
        try:
            if http_status == []:
                os.system('service tomcat7 start')
            time.sleep(3)    # 等待启动
                new_http_status = os.popen('netstat -tulnp | grep ":8080"','r').readlines()
                str1 = ''.join(new_http_status)
                is_port = -1;
                send_email(warning = "8080 port shutdown, This is a warning!!!")  # 发送通知
            try:
                  is_port = str1.split()[3].split(':')[-1]
            except IndexError, e:
              print("out of range:", e)
                if is_port != '8080':
                    print 'tomcat 启动失败'
                else:
                    print 'tomcat 启动成功'
            else:
                print '8080端口正常'
            time.sleep(5)
        except KeyboardInterrupt:
            sys.exit('out order
    ')  

    6. 科学计算,大数据,图形识别。。。  

      看工作需要!

    以下命令为反向kill某个端口的服务

    # netstat -tunlp | grep ':8080' | awk '{split($7, arr, "/"); print(arr[1])}' | kill -9 
     
     
  • 相关阅读:
    45 个非常有用的 Oracle 查询语句
    [转载]java图片缩放处理
    [转载]java图片缩放处理
    十步完全理解SQL
    十步完全理解SQL
    day04_20170521_函数(二)
    to disable the entity lazy load, The ObjectContext instance has been disposed and can no longer be used for operations that require a connection.
    wordpress mobile templates
    linq query, using int.parse to convert varchar to int while orderby
    appfabric 简单应用
  • 原文地址:https://www.cnblogs.com/yougewe/p/9454111.html
Copyright © 2011-2022 走看看