zoukankan      html  css  js  c++  java
  • multiple delimiters

    re module version

    import re
    
    
    string_out = """ace, bda; des; edf;fsa; gas;   ace
    b
    e"""
    
    
    
    def split_re(origin_string='', separators=','):
    
        origin_list = re.split(r'[%s]' % separators, origin_string)
    
        # if you also want the delimiters:
        # origin_list = re.split(r'([%s])' % separators, origin_string)
    
        total_list = []
        for data in origin_list:
            if data != '' and data not in total_list:
                total_list.append(data)
    
        return total_list
    
    
    result = ','.join(split_re(string_out, '
    ,+;;,、 '))
    
    print(result)
    
    

    my version

    string_out = """ace, bda; des; edf;fsa; gas;   ace
    b
    e"""
    
    
    def split_simple(origin_string='', separators=','):
        origin_list = [origin_string]
    
        # get different list from different separator
        for sep in separators:
    
            sep_list = []
            for r in origin_list:
                for i in r.split(sep):
                    sep_list.append(i.strip())
    
            origin_list = sep_list
    
        # remove none and repeat value
        total_list = []
        for index, data in enumerate(origin_list):
            if index == len(origin_list):
                break
            else:
                if data != '' and data not in total_list:
                    total_list.append(data)
    
        return total_list
    
    
    result = ','.join(split_simple(string_out, '
    ,+;;,、 '))
    
    print(result)
    
    
    

    Python 3 version

    from functools import reduce
    
    
    def split_by_separator(origin_string='', separators=','):
        origin_list = [origin_string]
        for sep in separators:
            tmp_each = []
            for r in origin_list:
                tmp_each.extend(map(lambda x: x.strip(), r.split(sep)))
                print('tmp_each: ', tmp_each)
            origin_list = tmp_each
        tmp_total = []
        [tmp_total.append(data) for data in origin_list if data != '']
        return reduce(lambda x, y: y in x and x or x + [y], [[], ] + tmp_total)
    
    
    string_out = ' ;vickey; hello; world; hey;how; are; 
    a、b,cd'
    
    result = ','.join(split_by_separator(string_out, '
    ,+;;,、'))
    
    print(result)
    
    

    Python 2 version

    #!/usr/bin/env python
    # _*_ coding: utf-8 _*_
    # @Time     : 2017/3/9 19:52
    # @Author   : otfsenter
    # @File     : a.py
    
    #coding:utf-8
    
    result = '''
    sdf-asd
    sdf-asd01
    sdf-asd02
    sdf-asd,sdf-asd01 ,sdf-asd02
    aui+otfsenter+which
    '''
    
    # result = ''
    # with open('tmp.txt', 'r') as f:
    #     for i in f:
    #         result += i
    #
    # print result
    
    def split_by_separator(string='', separators=','):
        rst = [string]
        for sep in separators:
            tmp = []
            for r in rst:
                tmp.extend(map(lambda x: x.strip(), r.split(sep)))
            rst = tmp
        list_tmp = []
        [list_tmp.append(data) for data in rst if data != '']
        return reduce(lambda x, y: y in x and x or x + [y], [[], ] + list_tmp)
    
    print split_by_separator(result, '
    ,+')
    
    
  • 相关阅读:
    SDUST OJ 时间类的加、减法赋值运算
    POJ 2823 (滑动窗口)
    POJ 2229 计数DP
    POJ 1995 (快速幂)
    poj 3009 (深搜求最短路)
    C++ 学习笔记之 STL 队列
    C++ 学习笔记之 引用
    Anaconda3使用
    Ubuntu 18.04安装Conda、Jupyter Notebook、Anaconda
    Ubuntu 18.04安装 pyenv、pyenv-virtualenv、virtualenv、Numpy、SciPy、Pillow、Matplotlib
  • 原文地址:https://www.cnblogs.com/otfsenter/p/6544416.html
Copyright © 2011-2022 走看看