zoukankan      html  css  js  c++  java
  • python标准库学习7

    使用 os.path 模块处理文件名

    import os
    
    filename = "my/little/pony"
    
    print "using", os.name, "..."
    print "split", "=>", os.path.split(filename)
    print "splitext", "=>", os.path.splitext(filename)
    print "dirname", "=>", os.path.dirname(filename)
    print "basename", "=>", os.path.basename(filename)
    print "join", "=>", os.path.join(os.path.dirname(filename),
                                     os.path.basename(filename))
    
    using nt ...
    split => ('my/little', 'pony')
    splitext => ('my/little/pony', '')
    dirname => my/little
    basename => pony
    join => my/little\pony
    

      当前目录和上一级目录

    >>> os.pardir
    '..'
    >>> os.curdir
    '.'
    

      

    使用 os.path 模块检查文件名的特征

    import os
    
    FILES = (
        os.curdir,
        "/",
        "file",
        "/file",
        "samples",
        "samples/sample.jpg",
        "directory/file",
        "../directory/file",
        "/directory/file"
        )
    
    for file in FILES:
        print file, "=>",
        if os.path.exists(file):
            print "EXISTS",
        if os.path.isabs(file):
            print "ISABS",
        if os.path.isdir(file):
            print "ISDIR",
        if os.path.isfile(file):
            print "ISFILE",
        if os.path.islink(file):
            print "ISLINK",
        if os.path.ismount(file):
            print "ISMOUNT",
        print
    
    . => EXISTS ISDIR
    / => EXISTS ISABS ISDIR ISMOUNT
    file =>
    /file => ISABS
    samples => EXISTS ISDIR
    samples/sample.jpg => EXISTS ISFILE
    directory/file =>
    ../directory/file =>
    /directory/file => ISABS
    

      expanduser 函数以与大部分Unix shell相同的方式处理用户名快捷符号(~, 不过在 Windows 下工作不正常),

    使用 os.path 模块将用户名插入到文件名

    import os
    
    print os.path.expanduser("~/.pythonrc")
    
    # /home/effbot/.pythonrc
    

      expandvars 函数将文件名中的环境变量替换为对应值

    使用 os.path 替换文件名中的环境变量

    import os
    
    os.environ["USER"] = "user"
    
    print os.path.expandvars("/home/$USER/config")
    print os.path.expandvars("$USER/folders")
    
    /home/user/config
    user/folders
    

      列出目录下所有的文件和目录

    >>> a=[file for file in os.listdir("d:\\new")]
    >>> for i in a:
    	print i
    

      walk 函数会帮你找出一个目录树下的所有文件. 它的参数依次是目录名, 回调函数, 以及传递给回调函数的数据对象.

    使用 os.path 搜索文件系统

    import os
    
    def callback(arg, directory, files):
        for file in files:
            print os.path.join(directory, file), repr(arg)
    
    os.path.walk(".", callback, "secret message")
    
    ./aifc-example-1.py 'secret message'
    ./anydbm-example-1.py 'secret message'
    ./array-example-1.py 'secret message'
    ...
    ./samples 'secret message'
    ./samples/sample.jpg 'secret message'
    ./samples/sample.txt 'secret message'
    ./samples/sample.zip 'secret message'
    ./samples/articles 'secret message'
    ./samples/articles/article-1.txt 'secret message'
    ./samples/articles/article-2.txt 'secret message'
    ...
    

      index 函数会返回一个文件名列表, 你可以直接使用for-in 循环处理文件.

    使用 os.listdir 搜索文件系统

    import os
    
    def index(directory):
        # like os.listdir, but traverses directory trees
        stack = [directory]
        files = []
        while stack:
            directory = stack.pop()
            for file in os.listdir(directory):
                fullname = os.path.join(directory, file)
                files.append(fullname)
                if os.path.isdir(fullname) and not os.path.islink(fullname):
                    stack.append(fullname)
        return files
    
    for file in index("."):
        print file
    
    .\aifc-example-1.py
    .\anydbm-example-1.py
    .\array-example-1.py
    ...
    

      一次返回一个文件

    import os
    
    class DirectoryWalker:
        # a forward iterator that traverses a directory tree
    
        def _ _init_ _(self, directory):
            self.stack = [directory]
            self.files = []
            self.index = 0
    
        def _ _getitem_ _(self, index):
            while 1:
                try:
                    file = self.files[self.index]
                    self.index = self.index + 1
                except IndexError:
                    # pop next directory from stack
                    self.directory = self.stack.pop()
                    self.files = os.listdir(self.directory)
                    self.index = 0
                else:
                    # got a filename
                    fullname = os.path.join(self.directory, file)
                    if os.path.isdir(fullname) and not os.path.islink(fullname):
                        self.stack.append(fullname)
                    return fullname
    
    for file in DirectoryWalker("."):
        print file
    
    .\aifc-example-1.py
    .\anydbm-example-1.py
    .\array-example-1.py
    ...
    

      注意 DirectoryWalker 类并不检查传递给 _ _getitem_ _ 方法的索引值. 这意味着如果你越界访问序列成员(索引数字过大)的话, 这个类将不能正常工作.

    下面这个例子它返回文件名和它的 os.stat 属性(一个元组). 这个版本在每个文件上都能节省一次或两次stat 调用( os.path.isdir 和 os.path.islink 内部都使用了 stat ), 并且在一些平台上运行很快.

    使用 DirectoryStatWalker 搜索文件系统

    import os, stat
    
    class DirectoryStatWalker:
        # a forward iterator that traverses a directory tree, and
        # returns the filename and additional file information
    
        def _ _init_ _(self, directory):
            self.stack = [directory]
            self.files = []
            self.index = 0
    
        def _ _getitem_ _(self, index):
            while 1:
                try:
                    file = self.files[self.index]
                    self.index = self.index + 1
                except IndexError:
                    # pop next directory from stack
                    self.directory = self.stack.pop()
                    self.files = os.listdir(self.directory)
                    self.index = 0
                else:
                    # got a filename
                    fullname = os.path.join(self.directory, file)
                    st = os.stat(fullname)
                    mode = st[stat.ST_MODE]
                    if stat.S_ISDIR(mode) and not stat.S_ISLNK(mode):
                        self.stack.append(fullname)
                    return fullname, st
    
    for file, st in DirectoryStatWalker("."):
        print file, st[stat.ST_SIZE]
    
    .\aifc-example-1.py 336
    .\anydbm-example-1.py 244
    .\array-example-1.py 526
    

      

    Using the stat Module

    import stat
    import os, time
    
    st = os.stat("samples/sample.txt")
    
    print "mode", "=>", oct(stat.S_IMODE(st[stat.ST_MODE]))
    
    print "type", "=>",
    if stat.S_ISDIR(st[stat.ST_MODE]):
        print "DIRECTORY",
    if stat.S_ISREG(st[stat.ST_MODE]):
        print "REGULAR",
    if stat.S_ISLNK(st[stat.ST_MODE]):
        print "LINK",
    print
    
    print "size", "=>", st[stat.ST_SIZE]
    
    print "last accessed", "=>", time.ctime(st[stat.ST_ATIME])
    print "last modified", "=>", time.ctime(st[stat.ST_MTIME])
    print "inode changed", "=>", time.ctime(st[stat.ST_CTIME])
    
    mode => 0664
    type => REGULAR
    size => 305
    last accessed => Sun Oct 10 22:12:30 1999
    last modified => Sun Oct 10 18:39:37 1999
    inode changed => Sun Oct 10 15:26:38 1999
    

      

    使用 string 模块

    import string
    
    text = "Monty Python's Flying Circus"
    
    print "upper", "=>", string.upper(text)
    print "lower", "=>", string.lower(text)
    print "split", "=>", string.split(text)
    print "join", "=>", string.join(string.split(text), "+")
    print "replace", "=>", string.replace(text, "Python", "Java")
    print "find", "=>", string.find(text, "Python"), string.find(text, "Java")
    print "count", "=>", string.count(text, "n")
    
    upper => MONTY PYTHON'S FLYING CIRCUS
    lower => monty python's flying circus
    split => ['Monty', "Python's", 'Flying', 'Circus']
    join => Monty+Python's+Flying+Circus
    replace => Monty Java's Flying Circus
    find => 6 -1
    count => 3
    

      

    使用字符串方法替代 string 模块函数

    text = "Monty Python's Flying Circus"
    
    print "upper", "=>", text.upper()
    print "lower", "=>", text.lower()
    print "split", "=>", text.split()
    print "join", "=>", "+".join(text.split())
    print "replace", "=>", text.replace("Python", "Perl")
    print "find", "=>", text.find("Python"), text.find("Perl")
    print "count", "=>", text.count("n")
    
    upper => MONTY PYTHON'S FLYING CIRCUS
    lower => monty python's flying circus
    split => ['Monty', "Python's", 'Flying', 'Circus']
    join => Monty+Python's+Flying+Circus
    replace => Monty Perl's Flying Circus
    find => 6 -1
    count => 3
    

      

    使用 string 模块将字符串转为数字

    import string
    
    print int("4711"),
    print string.atoi("4711"),
    print string.atoi("11147", 8), # octal 八进制
    print string.atoi("1267", 16), # hexadecimal 十六进制
    print string.atoi("3mv", 36) # whatever...
    
    print string.atoi("4711", 0),
    print string.atoi("04711", 0),
    print string.atoi("0x4711", 0)
    
    print float("4711"),
    print string.atof("1"),
    print string.atof("1.23e5")
    
    4711 4711 4711 4711 4711
    4711 2505 18193
    4711.0 1.0 123000.0
    

      operator 模块为 Python 提供了一个 "功能性" 的标准操作符接口. 当使用 map 以及 filter 一类的函数的时候, operator 模块中的函数可以替换一些lambda 函式. 而且这些函数在一些喜欢写晦涩代码的程序员中很流行.

    使用 operator 模块

    print "add", "=>", reduce(operator.add, sequence)
    print "sub", "=>", reduce(operator.sub, sequence)
    print "mul", "=>", reduce(operator.mul, sequence)
    print "concat", "=>", operator.concat("spam", "egg")
    print "repeat", "=>", operator.repeat("spam", 5)
    print "getitem", "=>", operator.getitem(sequence, 2)
    print "indexOf", "=>", operator.indexOf(sequence, 2)
    print "sequenceIncludes", "=>", operator.sequenceIncludes(sequence, 3)
    
    add => 7
    sub => -5
    mul => 8
    concat => spamegg
    repeat => spamspamspamspamspam
    
    getitem => 4
    indexOf => 1
    sequenceIncludes => 0
    

      

    使用 operator 模块检查类型

    import operator
    import UserList
    
    def dump(data):
        print type(data), "=>",
        if operator.isCallable(data):
            print "CALLABLE",
        if operator.isMappingType(data):
            print "MAPPING",
        if operator.isNumberType(data):
            print "NUMBER",
        if operator.isSequenceType(data):
            print "SEQUENCE",
        print
            
    dump(0)
    dump("string")
    dump("string"[0])
    dump([1, 2, 3])
    dump((1, 2, 3))
    dump({"a": 1})
    dump(len) # function 函数
    dump(UserList) # module 模块
    dump(UserList.UserList) # class 类
    dump(UserList.UserList()) # instance 实例
    
    <type 'int'> => NUMBER
    <type 'string'> => SEQUENCE
    <type 'string'> => SEQUENCE
    <type 'list'> => SEQUENCE
    <type 'tuple'> => SEQUENCE
    <type 'dictionary'> => MAPPING
    <type 'builtin_function_or_method'> => CALLABLE
    <type 'module'> =>
    <type 'class'> => CALLABLE
    <type 'instance'> => MAPPING NUMBER SEQUENCE
    

      copy 模块包含两个函数, 用来拷贝对象

    使用 copy 模块复制对象

    import copy
    
    a = [[1],[2],[3]]
    b = copy.copy(a)
    
    print "before", "=>"
    print a
    print b
    
    # modify original
    a[0][0] = 0
    a[1] = None
    
    print "after", "=>"
    print a
    print b
    
    before =>
    [[1], [2], [3]]
    [[1], [2], [3]]
    after =>
    [[0], None, [3]]
    [[0], [2], [3]]
    

      

    使用 copy 模块复制集合(Collections)

    import copy
    
    a = [[1],[2],[3]]
    b = copy.deepcopy(a)
    
    print "before", "=>"
    print a
    print b
    
    # modify original
    a[0][0] = 0
    a[1] = None
    
    print "after", "=>"
    print a
    print b
    
    before =>
    [[1], [2], [3]]
    [[1], [2], [3]]
    after =>
    [[0], None, [3]]
    [[1], [2], [3]]
    

     使用sys模块获得脚本的参数

    import sys
    
    print "script name is", sys.argv[0]
    
    if len(sys.argv) > 1:
        print "there are", len(sys.argv)-1, "arguments:"
        for arg in sys.argv[1:]:
            print arg
    else:
        print "there are no arguments!"
    
    script name is sys-argv-example-1.py
    there are no arguments!
    

      

    使用sys模块操作模块搜索路径

    import sys
    
    print "path has", len(sys.path), "members"
    
    # add the sample directory to the path
    sys.path.insert(0, "samples")
    import sample
    
    # nuke the path
    sys.path = []
    import random # oops!
    
    path has 7 members
    this is the sample module!
    Traceback (innermost last):
      File "sys-path-example-1.py", line 11, in ?
        import random # oops!
    ImportError: No module named random
    

      

    使用sys模块查找内建模块

    import sys
    
    def dump(module):
        print module, "=>",
        if module in sys.builtin_module_names:
            print "<BUILTIN>"
        else:
            module = _ _import_ _(module)
            print module._ _file_ _
    
    dump("os")
    dump("sys")
    dump("string")
    dump("strop")
    dump("zlib")
    
    os => C:\python\lib\os.pyc
    sys => <BUILTIN>
    string => C:\python\lib\string.pyc
    strop => <BUILTIN>
    zlib => C:\python\zlib.pyd
    

      

    使用sys模块查找已导入的模块

    modules 字典包含所有加载的模块. import 语句在从磁盘导入内容之前会先检查这个字典.

    import sys
    
    print sys.modules.keys()
    
    ['os.path', 'os', 'exceptions', '_ _main_ _', 'ntpath', 'strop', 'nt',
    'sys', '_ _builtin_ _', 'site', 'signal', 'UserDict', 'string', 'stat']
    

      getrefcount 函数 返回给定对象的引用记数 - 也就是这个对象使用次数. Python 会跟踪这个值, 当它减少为0的时候, 就销毁这个对象.

    使用sys模块获得引用记数

    import sys
    
    variable = 1234
    
    print sys.getrefcount(0)
    print sys.getrefcount(variable)
    print sys.getrefcount(None)
    
    50
    3
    192
    

      注意这个值总是比实际的数量大, 因为该函数本身在确定这个值的时候依赖这个对象

    使用sys模块获得当前平台

    import sys
    
    #
    # emulate "import os.path" (sort of)...
    
    if sys.platform == "win32":
        import ntpath
        pathmodule = ntpath
    elif sys.platform == "mac":
        import macpath
        pathmodule = macpath
    else:
        # assume it's a posix platform
        import posixpath
        pathmodule = posixpath
    
    print pathmodule
    

      setprofiler 函数允许你配置一个分析函数(profiling function). 这个函数会在每次调用某个函数或方法时被调用(明确或隐含的), 或是遇到异常的时候被调用. 

    使用sys模块配置分析函数

    import sys
    
    def test(n):
        j = 0
        for i in range(n):
            j = j + i
        return n
    
    def profiler(frame, event, arg):
        print event, frame.f_code.co_name, frame.f_lineno, "->", arg
    
    # profiler is activated on the next call, return, or exception
    # 分析函数将在下次函数调用, 返回, 或异常时激活
    sys.setprofile(profiler)
    
    # profile this function call
    # 分析这次函数调用
    test(1)
    
    # disable profiler
    # 禁用分析函数
    sys.setprofile(None)
    
    # don't profile this call
    # 不会分析这次函数调用
    test(2)
    
    call test 3 -> None
    return test 7 -> 1
    

      

    使用sys模块配置单步跟踪函数

    import sys
    
    def test(n):
        j = 0
        for i in range(n):
            j = j + i
        return n
    
    def tracer(frame, event, arg):
        print event, frame.f_code.co_name, frame.f_lineno, "->", arg
        return tracer
    
    # tracer is activated on the next call, return, or exception
    # 跟踪器将在下次函数调用, 返回, 或异常时激活
    sys.settrace(tracer)
    
    # trace this function call
    # 跟踪这次函数调用
    test(1)
    
    # disable tracing
    # 禁用跟踪器
    sys.settrace(None)
    
    # don't trace this call
    # 不会跟踪这次函数调用
    test(2)
    
    call test 3 -> None
    line test 3 -> None
    line test 4 -> None
    line test 5 -> None
    line test 5 -> None
    line test 6 -> None
    line test 5 -> None
    line test 7 -> None
    return test 7 -> 1
    

      










  • 相关阅读:
    12306-单
    12306-票
    12306-车
    12306-人
    12306-目录
    a+=b 是什么意思?
    开启hadoop和Hbase集群的lzo压缩功能(转)
    HBase性能优化方法总结(转)
    Spark向HDFS中存储数据
    Spark从HDFS上读取JSON数据
  • 原文地址:https://www.cnblogs.com/rollenholt/p/2264682.html
Copyright © 2011-2022 走看看