zoukankan      html  css  js  c++  java
  • 脚本 script 常用脚本

    remove_all_pyc

    find . -name "*.pyc" -exec git rm -f {} ;
    
    import requests
    import re
    
    # get url
    url = input('Enter a URL (include `http://`): ')
    
    # connect to the url
    website = requests.get(url)
    
    # read html
    html = website.text
    
    # use re.findall to grab all the links
    links = re.findall('"((http|ftp)s?://.*?)"', html)
    
    # output links
    for link in links:
        print(link[0])
    

    rename_with_slice

    import os
    import glob
    
    os.chdir("/Users/mikeherman/repos/bugs/se-platform/se/core/permissions")
    for file in glob.glob("*.json"):
        file_name = os.path.splitext(file)[0]
        extension = os.path.splitext(file)[1]
        new_file_name = file_name[:-6] + extension
        try:
            os.rename(file, new_file_name)
        except OSError as e:
            print(e)
        else:
            print("Renamed {} to {}".format(file, new_file_name))
    

    load_json_without_dupes

    def dict_raise_on_duplicates(ordered_pairs):
        """reject duplicate keys"""
        my_dict = dict()
        for key, values in ordered_pairs:
            if key in my_dict:
                raise ValueError("Duplicate key: {}".format(key,))
            else:
                my_dict[key] = values
        return my_dict
    

    execution_time

    """
    ExecutionTime
    This class is used for timing execution of code.
    For example:
        timer = ExecutionTime()
        print 'Hello world!'
        print 'Finished in {} seconds.'.format(timer.duration())
    """
    
    
    import time
    import random
    
    
    class ExecutionTime:
        def __init__(self):
            self.start_time = time.time()
    
        def duration(self):
            return time.time() - self.start_time
    
    
    # ---- run code ---- #
    
    
    timer = ExecutionTime()
    sample_list = list()
    my_list = [random.randint(1, 888898) for num in
               range(1, 1000000) if num % 2 == 0]
    print('Finished in {} seconds.'.format(timer.duration()))
    

    benchmark_permissions_loading_django

    import os
    import time
    import numpy
    
    # temp file for benchmarking
    
    
    def timeit(method):
    
        def timed(*args, **kw):
            ts = time.time()
    
            result = method(*args, **kw)
            te = time.time()
            all_times.append(te - ts)
    
            print(all_times)
            print(numpy.mean(all_times))
            return result
    
        return timed
    
    
    def create_new_db():
        os.system("mysqladmin -u root drop DATABASE_NAME -f")
        os.system("mysqladmin -u root create DATABASE_NAME -f")
        os.system("./manage.py syncdb")
        os.system("./manage.py migrate")
    
    
    @timeit
    def load_new_perms():
        os.system("./manage.py LOAD_PERMS_COMMAND")
    
    
    if __name__ == "__main__":
        n = 0
        all_times = list()
        while n < 10:
            create_new_db()
            load_new_perms()
            n += 1
    

    basic_email_web_crawler

    import requests
    import re
    
    # get url
    url = input('Enter a URL (include `http://`): ')
    
    # connect to the url
    website = requests.get(url)
    
    # read html
    html = website.text
    
    # use re.findall to grab all the links
    links = re.findall('"((http|ftp)s?://.*?)"', html)
    emails = re.findall('([w.,]+@[w.,]+.w+)', html)
    
    
    # print the number of links in the list
    print("
    Found {} links".format(len(links)))
    for email in emails:
        print(email)
    
    
    import requests
    import re
    try:
        from urllib.parse import urljoin
    except ImportError:
        from urlparse import urljoin
    
    # regex
    link_re = re.compile(r'href="(.*?)"')
    
    
    def crawl(url):
    
        req = requests.get(url)
    
        # Check if successful
        if(req.status_code != 200):
            return []
    
        # Find links
        links = link_re.findall(req.text)
    
        print("
    Found {} links".format(len(links)))
    
        # Search links for emails
        for link in links:
    
            # Get an absolute URL for a link
            link = urljoin(url, link)
    
            print(link)
    
    if __name__ == '__main__':
        crawl('http://www.realpython.com')
    

    find_files_recursively

    import fnmatch
    import os
    
    # constants
    PATH = './'
    PATTERN = '*.md'
    
    
    def get_file_names(filepath, pattern):
        matches = []
        if os.path.exists(filepath):
            for root, dirnames, filenames in os.walk(filepath):
                for filename in fnmatch.filter(filenames, pattern):
                    # matches.append(os.path.join(root, filename))  # full path
                    matches.append(os.path.join(filename))  # just file name
            if matches:
                print("Found {} files:".format(len(matches)))
                output_files(matches)
            else:
                print("No files found.")
        else:
            print("Sorry that path does not exist. Try again.")
    
    
    def output_files(list_of_files):
        for filename in list_of_files:
            print(filename)
    
    
    if __name__ == '__main__':
        get_file_names(PATH, PATTERN)
    

    optimize_images_with_wand

    import fnmatch
    import os
    
    # pip install Wand
    from wand.image import Image
    # pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz
    from hurry.filesize import size
    
    
    # constants
    PATH = '/../../../..'
    PATTERN = '*.jpg'
    
    
    def get_image_file_names(filepath, pattern):
        matches = []
        if os.path.exists(filepath):
            for root, dirnames, filenames in os.walk(filepath):
                for filename in fnmatch.filter(filenames, pattern):
                    matches.append(os.path.join(root, filename))  # full path
            if matches:
                print("Found {} files, with a total file size of {}.".format(
                    len(matches), get_total_size(matches)))
                return matches
            else:
                print("No files found.")
        else:
            print("Sorry that path does not exist. Try again.")
    
    
    def get_total_size(list_of_image_names):
        total_size = 0
        for image_name in list_of_image_names:
            total_size += os.path.getsize(image_name)
        return size(total_size)
    
    
    def resize_images(list_of_image_names):
        print("Optimizing ... ")
        for index, image_name in enumerate(list_of_image_names):
            with open(image_name) as f:
                image_binary = f.read()
            with Image(blob=image_binary) as img:
                if img.height >= 600:
                    img.transform(resize='x600')
                    img.save(filename=image_name)
        print("Optimization complete.")
    
    
    if __name__ == '__main__':
        all_images = get_image_file_names(PATH, PATTERN)
        resize_images(all_images)
        get_image_file_names(PATH, PATTERN)
    

    csv_split

    
    import sys
    import os
    import csv
    import argparse
    
    """
    Splits a CSV file into multiple files based on command line arguments.
        Arguments:
        `-h`: help file of usage of the script
        `-i`: input file name
        `-o`: output file name
        `-r`: row limit to split
        Default settings:
        `output_path` is the current directory
        headers are displayed on each split file
        the default delimeter is a comma
        Example usage:
        # split csv by every 100 rows
        >> python csv_split.py -i input.csv -o output -r 10
    """
    
    
    def get_arguments():
        """Grab user supplied arguments using the argparse library."""
    
        # Use arparse to get command line arguments
        parser = argparse.ArgumentParser()
        parser.add_argument("-i", "--input_file", required=True,
                            help="csv input file (with extension)", type=str)
        parser.add_argument("-o", "--output_file", required=True,
                            help="csv output file (without extension)", type=str)
        parser.add_argument("-r", "--row_limit", required=True,
                            help="row limit to split csv at", type=int)
        args = parser.parse_args()
    
        # Check if the input_file exits
        is_valid_file(parser, args.input_file)
    
        # Check if the input_file is valid
        is_valid_csv(parser, args.input_file, args.row_limit)
    
        return args.input_file, args.output_file, args.row_limit
    
    
    def is_valid_file(parser, file_name):
        """Ensure that the input_file exists."""
        if not os.path.exists(file_name):
            parser.error("The file '{}' does not exist!".format(file_name))
            sys.exit(1)
    
    
    def is_valid_csv(parser, file_name, row_limit):
        """
        Ensure that the # of rows in the input_file
        is greater than the row_limit.
        """
        row_count = 0
        for row in csv.reader(open(file_name)):
            row_count += 1
        # Note: You could also use a generator expression
        # and the sum() function to count the rows:
        # row_count = sum(1 for row in csv.reader(open(file_name)))
        if row_limit > row_count:
            parser.error(
                "The 'row_count' of '{}' is > the number of rows in '{}'!"
                .format(row_limit, file_name)
            )
            sys.exit(1)
    
    
    def parse_file(arguments):
        """
        Splits the CSV into multiple files or chunks based on the row_limit.
        Then create new CSV files.
        """
        input_file = arguments[0]
        output_file = arguments[1]
        row_limit = arguments[2]
        output_path = '.'  # Current directory
    
        # Read CSV, split into list of lists
        with open(input_file, 'r') as input_csv:
            datareader = csv.reader(input_csv)
            all_rows = []
            for row in datareader:
                all_rows.append(row)
    
            # Remove header
            header = all_rows.pop(0)
    
            # Split list of list into chunks
            current_chunk = 1
            for i in range(0, len(all_rows), row_limit):  # Loop through list
                chunk = all_rows[i:i + row_limit]  # Create single chunk
    
                current_output = os.path.join(  # Create new output file
                    output_path,
                    "{}-{}.csv".format(output_file, current_chunk)
                )
    
                # Add header
                chunk.insert(0, header)
    
                # Write chunk to output file
                with open(current_output, 'w') as output_csv:
                    writer = csv.writer(output_csv)
                    writer = writer.writerows(chunk)
    
                # Output info
                print("")
                print("Chunk # {}:".format(current_chunk))
                print("Filepath: {}".format(current_output))
                print("# of rows: {}".format(len(chunk)))
    
                # Create new chunk
                current_chunk += 1
    
    
    if __name__ == "__main__":
        arguments = get_arguments()
        parse_file(arguments)
    

    random_name_generator

    from random import randint
    
    
    def random_name_generator(first, second, x):
        """
            Generates random names.
            Arguments:
             - list of first names
             - list of last names
             - number of random names
        """
        names = []
        for i in range(0, int(x)):
            random_first = randint(0, len(first)-1)
            random_last = randint(0, len(second)-1)
            names.append("{0} {1}".format(
                first[random_first],
                second[random_last])
            )
        return set(names)
    
    
    first_names = ["Drew", "Mike", "Landon", "Jeremy", "Tyler", "Tom", "Avery"]
    last_names = ["Smith", "Jones", "Brighton", "Taylor"]
    names = random_name_generator(first_names, last_names, 5)
    print('
    '.join(names))
    

    html_to_markdown

    # Convert all html files in a single directory to markdown
    #
    # 1. Install pandoc
    # 2. Run the script
     
     
     
    FILES=*.html
    for f in $FILES
    do
      # extension="${f##*.}"
      filename="${f%.*}"
      echo "Converting $f to $filename.md"
      `pandoc $f -t markdown -o ../mds/$filename.md`
      # uncomment this line to delete the source file.
      # rm $f
    done
    

    check_my_environment

    """
    Pass in a config file based on your environment.
    Example:
    import check_my_environment
    class Main:
        def __init__(self, configFile):
            pass
        def process(self):
            print("ok")
    if __name__ == "__main__":
        m = Main(some_script.CONFIGFILE)
        m.process()
    """
    
    
    import os
    import sys
    ENVIRONMENT = "development"
    CONFIGFILE = None
    
    
    def get_config_file():
        directory = os.path.dirname(__file__)
        return {
            "development": "{}/../config/development.cfg".format(directory),
            "staging": "{}/../config/staging.cfg".format(directory),
            "production": "{}/../config/production.cfg".format(directory)
        }.get(ENVIRONMENT, None)
    
    CONFIGFILE = get_config_file()
    
    if CONFIGFILE is None:
        sys.exit("Configuration error! Unknown environment set. 
                  Edit config.py and set appropriate environment")
    print("Config file: {}".format(CONFIGFILE))
    if not os.path.exists(CONFIGFILE):
        sys.exit("Configuration error! Config file does not exist")
    print("Config ok ....")
    

    jinja_quick_load

    """
    Render a quick Jinja2 template.
    Thanks Danny - http://pydanny.com/jinja2-quick-load-function.html
    Example:
    >>> from jinja_quick_load import render_from_template
    >>> data = {
    ...     "date": "June 12, 2014",
    ...     "items": ["oranges", "bananas", "steak", "milk"]
    ... }
    >>> render_from_template(".", "shopping_list.html", **data)
    """
    
    
    from jinja2 import FileSystemLoader, Environment
    
    
    def render_from_template(directory, template_name, **kwargs):
        loader = FileSystemLoader(directory)
        env = Environment(loader=loader)
        template = env.get_template(template_name)
        return template.render(**kwargs)
    

    rewrite_git_history

    I always forget how to back date, so here we go ...

    This is dangerous and should be signed off by the omniscience, omnipotence Git him/herself. Rewriting history is evil, in other words.

    $ git add <file_name>
    $ export GIT_COMMITER_DATE="Sun Jun 15 14:00 2014 +0100"
    $ export GIT_AUTHOR_DATE="Sun Jun 15 14:00 2014 +0100"
    $ git commit -m "so bad"
    $ git push
    

    GIT_COMMITER_DATE and GIT_AUTHOR_DATE are environment variables

    zipper

    import os
    from datetime import datetime
    from zipfile import ZipFile
    
    
    # set file name and time of creation
    today = datetime.now()
    file_name = 'zipper_' + today.strftime('%Y.%m.%dh%H%M') + '.zip'
    dir_name = 'tmp/'  # update path
    
    
    def zipdir(path, zip):
        for root, dirs, files in os.walk(path):
            for file in files:
                zip.write(os.path.join(root, file))
    
    if __name__ == '__main__':
        zipfile = ZipFile(file_name, 'w')
        zipdir(dir_name, zipfile)
        zipfile.close()
    

    查找当前文件下面所有文件中是否包含某个字符

    find . -name "*.py"| xargs  cat | grep org.csv
    

    linux 挂载samba同步文件

    #sudo apt-get install cifs-utils
    sudo mount -t cifs //IP/share $(pwd)/share -o username=username
    
    //192.168.3.145/username /home/username/dev/ cifs defaults,username=username,password=password,uid=uid,gid=gid
    
    mount -t cifs //60.205.230.226/share $(pwd)/share -o username=xxxxxxxx,passwd=xxxxxxx
    
    sudo mount -t cifs //192.168.0.103/Public /mnt/samba/ -o guest
    
    mount -t smbfs -o codepage=cp936,username=用户名,password=密码 , -l //ip地址/共享文件夹名 挂载点
    或
    mount -t smbfs -o codepage=cp936,username=用户名,password=密码 , -l //计算机名/共享文件夹名 挂载点
      若没有设置用户名和密码,则可以简化为:
    mount -t smbfs -o codepage=cp936 //ip地址或计算机名/共享文件夹名 挂载点
    

    根据端口号查看进程

    lsof -Pnl +M -i4 | grep 8010
    Linux下查看端口号所使用的进程号:
    使用lsof命令: lsof –i:端口号
    

    ubuntu:查看进程占用端口号

    sudo netstat -anp|grep pid
    

    Linux登录ssh携带密码

    sudo apt-get install sshpass
    sshpass -p '12345678' ssh androidyue@10.0.5.10
      注意上述必须要有单引号,不能为双引号。
    

    非交互式的sudo执行

    echo password | sudo -S ls
    

    远程主机的ssh操作

    ssh centos@192.168.202.205 << AAA
        ls
        exit
    AAA
    
  • 相关阅读:
    chrome 开发者工具——前端实用功能总结
    而立之年——回顾我的前端转行之路
    编译原理实战入门:用 JavaScript 写一个简单的四则运算编译器(修订版)
    手把手带你入门前端工程化——超详细教程
    手把手教你搭建 Vue 服务端渲染项目
    前端项目自动化部署——超详细教程(Jenkins、Github Actions)
    前端国际化辅助工具——自动替换中文并翻译
    深入了解 webpack 模块加载原理
    实现一个 webpack loader 和 webpack plugin
    博客本地编辑器-OpenLiveWriter安装使用
  • 原文地址:https://www.cnblogs.com/bergus/p/pythonscripts.html
Copyright © 2011-2022 走看看