zoukankan      html  css  js  c++  java
  • 正则表达式

    所有对于正则表达式的操作位于java.util.regex包下。

    两个重要的类:Matcher  Pattern

    package com.anllin.regex;

     

    import java.util.regex.Matcher;

    import java.util.regex.Pattern;

     

    public class Matches

    {

        public static void main(String[] args)

        {

           Matcher matcher = Pattern.compile("\\w+").matcher(

                  "this is a regex test, the fisrt program");

     

           while (matcher.find())

           {

               System.out.println("[" + matcher.group() + "]");

           }

     

           System.out.println("---------------------------");

     

           int i = 0;

           while (matcher.find(i))

           {

               System.out.print("[" + matcher.group() + "]");

               i++;

           }     

        }

    }

     

    输出结果 

    [this]

    [is]

    [a]

    [regex]

    [test]

    [the]

    [fisrt]

    [program]

    ---------------------------

    [this][his][is][s][is][is][s][a][a][regex][regex][egex][gex][ex][x][test][test][est][st][t][the][the][the][he][e][fisrt][fisrt][isrt][srt][rt][t][program][program][rogram][ogram][gram][ram][am][m]

     

    分组

    import java.util.regex.Matcher;

    import java.util.regex.Pattern;

     

    public class Group

    {

        public static void main(String[] args)

        {

           String statement = "this is a test about the regex group, the method groupCount is used";

     

           Matcher m = Pattern.compile("(?m)(\\S+)\\s+((\\S+)\\s+(\\S+))")

                  .matcher(statement);

     

           while (m.find())

           {

               for (int i = 0; i < m.groupCount(); i++)

               {

                  System.out.println("[" + m.group(i) + "]");

               }

           }

        }

    }

    输出结果

    [this is a]

    [this]

    [is a]

    [is]

    [test about the]

    [test]

    [about the]

    [about]

    [regex group, the]

    [regex]

    [group, the]

    [group,]

    [method groupCount is]

    [method]

    [groupCount is]

    [groupCount]

     

    Start() end()的使用

    import java.util.regex.Matcher;

    import java.util.regex.Pattern;

     

    public class StartEnd

    {

        public static void main(String[] args)

        {

           String[] input = new String[] { "java has regular expressing in 1.4",

                  "regular expressing now expressing in java",

                  "java represses oracular expressions" };

     

           Pattern p1 = Pattern.compile("re\\w*");

           Pattern p2 = Pattern.compile("java.*");

     

           for (int i = 0; i < input.length; i++)

           {

               System.out.println("input" + i + ":" + input[i]);

              

               Matcher m1 = p1.matcher(input[i]);

               Matcher m2 = p2.matcher(input[i]);

     

               while (m1.find())

               {

                  System.out.println("m1.find() [" + m1.group() + "] start = "

                         + m1.start() + ",end = " + m1.end());

               }

     

               while (m2.find())

               {

                  System.out.println("m2.find() [" + m2.group() + "] start = "

                         + m2.start() + ",end = " + m2.end());

               }

              

               if(m1.lookingAt())

               {

                  System.out.println("m1.lookingAt() [" + m1.group() + "] start = "

                         + m1.start() + ",end = " + m1.end());

               }

              

               if(m2.lookingAt())

               {

                  System.out.println("m2.lookingAt() [" + m2.group() + "] start = "

                         + m2.start() + ",end = " + m2.end());

               }

              

               if(m1.matches())

               {

                  System.out.println("m1.matches() [" + m1.group() + "] start = "

                         + m1.start() + ",end = " + m1.end());

               }

              

               if(m2.matches())

               {

                  System.out.println("m2.matches() [" + m2.group() + "] start = "

                         + m2.start() + ",end = " + m2.end());

               }

               System.out.println();

           }

        }

    }

     

    输出结果:

    input0:java has regular expressing in 1.4

    m1.find() [regular] start = 9,end = 16

    m1.find() [ressing] start = 20,end = 27

    m2.find() [java has regular expressing in 1.4] start = 0,end = 34

    m2.lookingAt() [java has regular expressing in 1.4] start = 0,end = 34

    m2.matches() [java has regular expressing in 1.4] start = 0,end = 34

     

    input1:regular expressing now expressing in java

    m1.find() [regular] start = 0,end = 7

    m1.find() [ressing] start = 11,end = 18

    m1.find() [ressing] start = 26,end = 33

    m2.find() [java] start = 37,end = 41

    m1.lookingAt() [regular] start = 0,end = 7

     

    input2:java represses oracular expressions

    m1.find() [represses] start = 5,end = 14

    m1.find() [ressions] start = 27,end = 35

    m2.find() [java represses oracular expressions] start = 0,end = 35

    m2.lookingAt() [java represses oracular expressions] start = 0,end = 35

    m2.matches() [java represses oracular expressions] start = 0,end = 35

     

     

    模式标记

    public static Pattern compile(String regex,int flags)

    import java.util.regex.Matcher;

    import java.util.regex.Pattern;

     

    public class Flag

    {

        public static void main(String[] args)

        {

           Pattern p = Pattern.compile("^java", Pattern.CASE_INSENSITIVE

                  | Pattern.MULTILINE);

           Matcher m = p.matcher("java has regex \njava has regex \n"

                  + "JAVA has pretty good regular expressions\n"

                  + "Regular expressions are in java");

     

           while (m.find())

           {

               System.out.println(m.group());

           }

        }

    }

    输出结果:

    java

    java

    JAVA

     

     

    Split()方法的使用

    public String[] split(CharSequence input,int limit)

    public String[] split(CharSequence input)

    import java.util.Arrays;

    import java.util.regex.Pattern;

     

    public class SplitDemo

    {

        public static void main(String[] args)

        {

           String input = "This!!unusual use!!of exclamation!!points";

            System.out.println(Arrays.asList(Pattern.compile("!!").split(input)));

     

           System.out

                  .println(Arrays.asList(Pattern.compile("!!").split(input, 3)));

           System.out.println(Arrays.asList("Aha! String has a split() built in"

                  .split(" ")));

        }

    }

    输出结果:

    [This, unusual use, of exclamation, points]

    [This, unusual use, of exclamation!!points]

    [Aha!, String, has, a, split(), built, in]

    替换操作

    import java.io.BufferedReader;

    import java.io.FileInputStream;

    import java.io.InputStreamReader;

    import java.util.regex.Matcher;

    import java.util.regex.Pattern;

     

    public class ReplaceTest

    {

        public static void main(String[] args) throws Exception

        {

           BufferedReader reader = new BufferedReader(new InputStreamReader(

                  new FileInputStream("src/com/anllin/regex/ReplaceTest.java")));

     

           String str = null;

           StringBuffer sb = new StringBuffer();

           while (null != (str = reader.readLine()))

           {

               sb.append(str);

           }

     

           String s = sb.toString();

     

           // Match the specially-commented block of text above:

           Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL)

                  .matcher(s);

     

           if (mInput.find())

           {

               // Captured by parentheses

               s = mInput.group(1);

           }

           // Replace two or more spaces with a single space:

           s = s.replaceAll(" {2,}", " ");

           // Replace one or more spaces at the beginning of each line with no

           // spaces.Must enable MULTILINE mode.

           s = s.replaceAll("(?m)^+", "");

           System.out.println(s);

     

           s = s.replaceFirst("[aeiou]", "(VOWEL1)");

           StringBuffer sbuf = new StringBuffer();

     

           Pattern p = Pattern.compile("[aeiou]");

           Matcher m = p.matcher(s);

           // Process the find information as you perform the replacements:

           while (m.find())

           {

               m.appendReplacement(sbuf, m.group().toUpperCase());

           }

           // Put in the remainder of ther text:

           m.appendTail(sbuf);

           System.out.println(sbuf);

     

        }

    }

    输出结果

    package com.anllin.regex;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.regex.Matcher;import java.util.regex.Pattern;public class ReplaceTest{  public static void main(String[] args) throws Exception    {      BufferedReader reader = new BufferedReader(new InputStreamReader(              new FileInputStream("src/com/anllin/regex/ReplaceTest.java")));    String str = null;       StringBuffer sb = new StringBuffer();     while (null != (str = reader.readLine()))       {          sb.append(str);       }      String s = sb.toString();       // Match the specially-commented block of text above:      Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL)               .matcher(s);      if (mInput.find())       {          // Captured by parentheses          s = mInput.group(1);     }      // Replace two or more spaces with a single space:       s = s.replaceAll(" {2,}", " ");       // Replace one or more spaces at the beginning of each line with no     // spaces.Must enable MULTILINE mode.     s = s.replaceAll("(?m)^+", "");     System.out.println(s);       s = s.replaceFirst("[aeiou]", "(VOWEL1)");    StringBuffer sbuf = new StringBuffer();      Pattern p = Pattern.compile("[aeiou]");       Matcher m = p.matcher(s);       // Process the find information as you perform the replacements:       while (m.find())     {          m.appendReplacement(sbuf, m.group().toUpperCase());     }      // Put in the remainder of ther text:     m.appendTail(sbuf);      System.out.println(sbuf);   }}

    --------------------------------------------------------------------

    p(VOWEL1)ckAgE cOm.AnllIn.rEgEx;ImpOrt jAvA.IO.BUffErEdREAdEr;ImpOrt jAvA.IO.FIlEInpUtStrEAm;ImpOrt jAvA.IO.InpUtStrEAmREAdEr;ImpOrt jAvA.UtIl.rEgEx.MAtchEr;ImpOrt jAvA.UtIl.rEgEx.PAttErn;pUblIc clAss REplAcETEst{  pUblIc stAtIc vOId mAIn(StrIng[] Args) thrOws ExcEptIOn    {      BUffErEdREAdEr rEAdEr = nEw BUffErEdREAdEr(nEw InpUtStrEAmREAdEr(              nEw FIlEInpUtStrEAm("src/cOm/AnllIn/rEgEx/REplAcETEst.jAvA")));    StrIng str = nUll;       StrIngBUffEr sb = nEw StrIngBUffEr();     whIlE (nUll != (str = rEAdEr.rEAdLInE()))       {          sb.AppEnd(str);       }      StrIng s = sb.tOStrIng();       // MAtch thE spEcIAlly-cOmmEntEd blOck Of tExt AbOvE:      MAtchEr mInpUt = PAttErn.cOmpIlE("/\\*!(.*)!\\*/", PAttErn.DOTALL)               .mAtchEr(s);      If (mInpUt.fInd())       {          // CAptUrEd by pArEnthEsEs          s = mInpUt.grOUp(1);     }      // REplAcE twO Or mOrE spAcEs wIth A sInglE spAcE:       s = s.rEplAcEAll(" {2,}", " ");       // REplAcE OnE Or mOrE spAcEs At thE bEgInnIng Of EAch lInE wIth nO     // spAcEs.MUst EnAblE MULTILINE mOdE.     s = s.rEplAcEAll("(?m)^+", "");     SystEm.OUt.prIntln(s);      s = s.rEplAcEFIrst("[AEIOU]", "(VOWEL1)");    StrIngBUffEr sbUf = nEw StrIngBUffEr();      PAttErn p = PAttErn.cOmpIlE("[AEIOU]");       MAtchEr m = p.mAtchEr(s);       // PrOcEss thE fInd InfOrmAtIOn As yOU pErfOrm thE rEplAcEmEnts:       whIlE (m.fInd())     {          m.AppEndREplAcEmEnt(sbUf, m.grOUp().tOUppErCAsE());     }      // PUt In thE rEmAIndEr Of thEr tExt:     m.AppEndTAIl(sbUf);      SystEm.OUt.prIntln(sbUf);   }}

     

     

    Reset()方法的使用

    import java.util.regex.Matcher;

    import java.util.regex.Pattern;

     

    public class ResetTest

    {

        public static void main(String[] args)

        {

           Matcher m = Pattern.compile("[frb][aiu][gx]").matcher(

                  "fix the rug with bags");

     

           while (m.find())

           {

               System.out.println(m.group());

           }

     

           m.reset("fix the rig with rags");

     

           while (m.find())

           {

               System.out.println(m.group());

           }

        }

    }

    Output:

    fix

    rug

    bag

    fix

    rig

    rag

     

     

     

    Summary of regular-expression constructs

    Construct

    Matches

     

     

    Characters

    x

    The character x

    \\

    The backslash character

    \0n

    The character with octal value 0n (0 <= n <= 7)

    \0nn

    The character with octal value 0nn (0 <= n <= 7)

    \0mnn

    The character with octal value 0mnn (0 <= m <= 3, 0 <= n <= 7)

    \xhh

    The character with hexadecimal value 0xhh

    \uhhhh

    The character with hexadecimal value 0xhhhh

    \t

    The tab character ('\u0009')

    \n

    The newline (line feed) character ('\u000A')

    \r

    The carriage-return character ('\u000D')

    \f

    The form-feed character ('\u000C')

    \a

    The alert (bell) character ('\u0007')

    \e

    The escape character ('\u001B')

    \cx

    The control character corresponding to x

     

     

    Character classes

    [abc]

    a, b, or c (simple class)

    [^abc]

    Any character except a, b, or c (negation)

    [a-zA-Z]

    a through z or A through Z, inclusive (range)

    [a-d[m-p]]

    a through d, or m through p: [a-dm-p] (union)

    [a-z&&[def]]

    d, e, or f (intersection)

    [a-z&&[^bc]]

    a through z, except for b and c: [ad-z] (subtraction)

    [a-z&&[^m-p]]

    a through z, and not m through p: [a-lq-z](subtraction)

     

     

    Predefined character classes

    .

    Any character (may or may not match line terminators)

    \d

    A digit: [0-9]

    \D

    A non-digit: [^0-9]

    \s

    A whitespace character: [ \t\n\x0B\f\r]

    \S

    A non-whitespace character: [^\s]

    \w

    A word character: [a-zA-Z_0-9]

    \W

    A non-word character: [^\w]

     

     

    POSIX character classes (US-ASCII only)

    \p{Lower}

    A lower-case alphabetic character: [a-z]

    \p{Upper}

    An upper-case alphabetic character:[A-Z]

    \p{ASCII}

    All ASCII:[\x00-\x7F]

    \p{Alpha}

    An alphabetic character:[\p{Lower}\p{Upper}]

    \p{Digit}

    A decimal digit: [0-9]

    \p{Alnum}

    An alphanumeric character:[\p{Alpha}\p{Digit}]

    \p{Punct}

    Punctuation: One of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

    \p{Graph}

    A visible character: [\p{Alnum}\p{Punct}]

    \p{Print}

    A printable character: [\p{Graph}\x20]

    \p{Blank}

    A space or a tab: [ \t]

    \p{Cntrl}

    A control character: [\x00-\x1F\x7F]

    \p{XDigit}

    A hexadecimal digit: [0-9a-fA-F]

    \p{Space}

    A whitespace character: [ \t\n\x0B\f\r]

     

     

    java.lang.Character classes (simple java character type)

    \p{javaLowerCase}

    Equivalent to java.lang.Character.isLowerCase()

    \p{javaUpperCase}

    Equivalent to java.lang.Character.isUpperCase()

    \p{javaWhitespace}

    Equivalent to java.lang.Character.isWhitespace()

    \p{javaMirrored}

    Equivalent to java.lang.Character.isMirrored()

     

     

    Classes for Unicode blocks and categories

    \p{InGreek}

    A character in the Greek block (simple block)

    \p{Lu}

    An uppercase letter (simple category)

    \p{Sc}

    A currency symbol

    \P{InGreek}

    Any character except one in the Greek block (negation)

    [\p{L}&&[^\p{Lu}]] 

    Any letter except an uppercase letter (subtraction)

     

     

    Boundary matchers

    ^

    The beginning of a line

    $

    The end of a line

    \b

    A word boundary

    \B

    A non-word boundary

    \A

    The beginning of the input

    \G

    The end of the previous match

    \Z

    The end of the input but for the final terminator, if any

    \z

    The end of the input

     

     

    Greedy quantifiers

    X?

    X, once or not at all

    X*

    X, zero or more times

    X+

    X, one or more times

    X{n}

    X, exactly n times

    X{n,}

    X, at least n times

    X{n,m}

    X, at least n but not more than m times

     

     

    Reluctant quantifiers

    X??

    X, once or not at all

    X*?

    X, zero or more times

    X+?

    X, one or more times

    X{n}?

    X, exactly n times

    X{n,}?

    X, at least n times

    X{n,m}?

    X, at least n but not more than m times

     

     

    Possessive quantifiers

    X?+

    X, once or not at all

    X*+

    X, zero or more times

    X++

    X, one or more times

    X{n}+

    X, exactly n times

    X{n,}+

    X, at least n times

    X{n,m}+

    X, at least n but not more than m times

     

     

    Logical operators

    XY

    X followed by Y

    X|Y

    Either X or Y

    (X)

    X, as a capturing group

     

     

    Back references

    \n

    Whatever the nth capturing group matched

     

     

    Quotation

    \

    Nothing, but quotes the following character

    \Q

    Nothing, but quotes all characters until \E

    \E

    Nothing, but ends quoting started by \Q

     

     

    Special constructs (non-capturing)

    (?:X)

    X, as a non-capturing group

    (?idmsux-idmsux) 

    Nothing, but turns match flags i d m s u x on - off

    (?idmsux-idmsux:X)  

    X, as a non-capturing group with the given flags i d m s u x on - off

    (?=X)

    X, via zero-width positive lookahead

    (?!X)

    X, via zero-width negative lookahead

    (?<=X)

    X, via zero-width positive lookbehind

    (?<!X)

    X, via zero-width negative lookbehind

    (?>X)

    X, as an independent, non-capturing group

     

  • 相关阅读:
    ES6 数值
    ES6 字符串
    ES6 Reflect 与 Proxy
    ES6 Map 与 Set
    es6 Symbol
    新兴的API(fileReader、geolocation、web计时、web worker)
    浏览器数据库 IndexedDB 入门教程
    离线应用与客户端存储(cookie storage indexedDB)
    javascript高级技巧篇(作用域安全、防篡改、惰性载入、节流、自定义事件,拖放)
    ajax与comet
  • 原文地址:https://www.cnblogs.com/zfc2201/p/2143751.html
Copyright © 2011-2022 走看看