zoukankan      html  css  js  c++  java
  • 正则表达式-汉字的匹配方法

    unicode :   ([u4e00-u9fa5]+)

    unicode :  ([u2E80-u9FFF]+)

    utf-8  :  ([x80-xff]+)

     1 #encoding:utf-8
     2 import re 
     3 
     4 
     5 
     6 def main():
     7     
     8     # ([u4e00-u9fa5]+)
     9     TEST_STR_1 = u'ab123kk123'
    10     pattern_str = u'[0-9]+([u4e00-u9fa5]+)[0-9]+'
    11     pattern = re.compile (pattern_str)
    12     m = pattern.search(TEST_STR_1)
    13     print m.group() if m is not None else None
    14     print m.group(1) if m is not None else None
    15     print '
    '
    16     
    17     TEST_STR_2 = u'ab123汉字123'
    18     m = pattern.search(TEST_STR_2)
    19     print m.group() if m is not None else None
    20     print m.group(1) if m is not None else None
    21     print '
    '
    22     
    23     # ([x80-xff]+)
    24     TEST_STR_3 = 'ab123汉字123'
    25     pattern_str = '[0-9]+([x80-xff]+)[0-9]+'
    26     pattern = re.compile (pattern_str)
    27     m = pattern.search(TEST_STR_3)
    28     print m.group().decode('utf-8') if m is not None else None
    29     print m.group(1).decode('utf-8') if m is not None else None
    30     print '
    '    
    31     
    32     # ([u2E80-u9FFF]+)
    33     TEST_STR_2 = u'ab123汉字123'
    34     pattern_str = u'[0-9]+([u2E80-u9FFF]+)[0-9]+'
    35     pattern = re.compile (pattern_str)
    36     m = pattern.search(TEST_STR_2)
    37     print m.group() if m is not None else None
    38     print m.group(1) if m is not None else None
    39     print '
    '
    40     
    41     
    42     
    43 if __name__ == '__main__':
    44     main()
  • 相关阅读:
    同步ajax请求
    Thinking in Java——笔记(11)
    Thinking in Java——笔记(10)
    Thinking in Java——笔记(9)
    Thinking in Java——笔记(8)
    Thinking in Java——笔记(7)
    Thinking in Java——笔记(6)
    Thinking in Java——笔记(5)
    Thinking in Java——笔记(4)
    Thinking in Java——笔记(3)
  • 原文地址:https://www.cnblogs.com/mmix2009/p/3220456.html
Copyright © 2011-2022 走看看