zoukankan      html  css  js  c++  java
  • Python常用功能函数

    Python常用功能函数汇总

    1.按行写字符串到文件中

    import sys, os, time, json
    def saveContext(filename,*name):
            format = '^'
            context = name[0]
            for i in name[1:]:
                context = context + format + str(i)
                context = str(context).replace('','(').replace('',')').replace('',',').replace('',':')
            #去除首位空格
            filename = filename.strip()
            #读取目录名称
            path = os.path.dirname(filename)
            #如果目录不存在则创建目录
            if not os.path.exists(path):
                os.makedirs(path) 
            #读取文件名称
            name = os.path.basename(filename)
            fp = open(filename,'a')
            fp.write(context+'
    ')
            fp.close()

    2.创建初始化浏览器

    #coding:utf-8
    import sys, os, time, json
    import urllib2
    from pyquery import PyQuery as pq
    from lxml import etree
    from selenium import webdriver
    from urlparse import urljoin 
    #设置utf-8模式
    reload(sys)
    sys.setdefaultencoding( "utf-8" )
    
    #初始化创建浏览器
    def init_drive():
        ua = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.3 Safari/537.36"
        cap = webdriver.DesiredCapabilities.PHANTOMJS
        cap["phantomjs.page.settings.resourceTimeout"] = 20000
        cap["phantomjs.page.settings.loadImages"] = True
        cap["phantomjs.page.settings.disk-cache"] = True
        cap["phantomjs.page.settings.userAgent"] = ua
        cap["phantomjs.page.customHeaders.User-Agent"] =ua
        cap["phantomjs.page.customHeaders.Referer"] = "http://tj.ac.10086.cn/login/"
        #driver = webdriver.PhantomJS(executable_path='/home/shutong/phantomjs/bin/phantomjs',desired_capabilities=cap, service_args=['--ignore-ssl-errors=true'])
        driver = webdriver.PhantomJS(desired_capabilities=cap, service_args=['--ignore-ssl-errors=true'])
        driver.set_page_load_timeout(60)  
        driver.set_script_timeout(60)
        return driver

    其中,获取网页html

    #初始化创建浏览器
    driver = init_drive()
    
    driver.get(url)
    html = driver.page_source
    
    #退出浏览器
    driver.quit()

    3.根据url获取网页Html函数

    #coding:utf-8
    import requests, json, time, re, os, sys, time
    import urllib2
    import random
    import numpy as np
    
    #设置为utf-8模式
    reload(sys)
    sys.setdefaultencoding( "utf-8" )
    
    #最终获取url的数据
    def getHtml(url):
        ua_list = ["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows NT 6.1; rv2.0.1) Gecko/20100101 Firefox/4.0.1","Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"]
        user_agent = random.choice(ua_list)
        request = urllib2.Request(url)
        request.add_header("User-Agent",user_agent)
        response = urllib2.urlopen(request,data=None,timeout=60)
        html = response.read()
        #可以根据编码格式进行编码
        #html = unicode(html,'utf-8')
        return html

    4.获取时间的不同格式

    import time
    import sys
    import os
    import shutil
    import MySQLdb
    import urllib2
    from pyquery import PyQuery as pq
    from lxml import etree
    import urllib
    import sys 
    import httplib
    import datetime
    import json
    from selenium import webdriver
    from urlparse import urljoin 
    httplib.HTTPConnection._http_vsn = 10
    httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
    
    #设置utf-8模式
    reload(sys)
    sys.setdefaultencoding( "utf-8" )
    
    #获取常用时间格式的函数
    #'%Y-%m-%d' 2017-11-18
    #'%Y%m%d'   20171118
    #%Y%m%d%H'  2017111817
    #空或其他   2017-11-18 17:26:35
    def getTime(*format):
        now = ''
        try:
            format = format[0]
        except :
            pass 
        if format == '%Y-%m-%d':
            now = time.strftime('%Y-%m-%d',time.localtime(time.time()))
        elif format == '%Y%m%d':
            now = time.strftime('%Y%m%d',time.localtime(time.time()))
        elif format == '%Y%m%d%H':
            now = time.strftime('%Y%m%d%H',time.localtime(time.time()))
        else :
            now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
        return now 

    5.连接Mysql执行sql语句

    import time
    import sys
    import os
    import shutil
    import MySQLdb
    import urllib2
    from pyquery import PyQuery as pq
    from lxml import etree
    import urllib
    import sys 
    import httplib
    import datetime
    import json
    from selenium import webdriver
    from urlparse import urljoin 
    httplib.HTTPConnection._http_vsn = 10
    httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
    
    #设置utf-8模式
    reload(sys)
    sys.setdefaultencoding( "utf-8" )
    
    #定义MySql数据库连接
    def conn_mysql(host='192.168.11.43',user='root',passwd='root',db='edw'):
        conn = ''
        try:
            conn= MySQLdb.connect(
            host= host,
            port = 3306,
            user=user,
            passwd=passwd,
            db =db,
            )
            #print "连接mysql成功"
        except :
            #pass 
            print "连接mysql失败"
        return conn
    
    #执行sql语句返回结果    
    def excute_sql(conn,sql):
        #conn = conn_mysql(host='192.168.122.194',user='root',passwd='123456',db='label')
        cur = conn.cursor()
        cur.execute('set character_set_client = utf8')
        cur.execute('set character_set_server = utf8')
        cur.execute('set character_set_connection = utf8')
        cur.execute('set character_set_results = utf8')
        cur.execute('set collation_connection = utf8_general_ci')
        cur.execute('set collation_server = utf8_general_ci')
        result = cur.fetchmany(cur.execute(sql))
        cur.close()
        conn.commit()
        conn.close()
        return result
  • 相关阅读:
    sentinel使用内置规则检测威胁——自定义规则是使用的KQL
    在Azure Sentinel中使用威胁情报——可以自己订阅,自己创建一条indicator来使用基于情报的检测
    sm2国密算法的纯c语言版本,使用于单片机平台(静态内存分配)
    JDK-8180048 : Interned string and symbol table leak memory during parallel unlinking
    CMS垃圾收集器小实验之CMSInitiatingOccupancyFraction参数
    记spring boot线上项目内存优化
    springboot 配置log4j2日志,并输出到文件
    SpringBoot 日志管理之自定义Appender
    Linux 上 定时备份postgresql 数据库的方法
    linux下执行sh脚本,提示Command not found解决办法
  • 原文地址:https://www.cnblogs.com/Jims2016/p/8445978.html
Copyright © 2011-2022 走看看