工具类
Patterns.java
1 package com.util; 2 3 import java.util.regex.Matcher; 4 import java.util.regex.Pattern; 5 6 /** 7 * Commonly used regular expression patterns. 8 */ 9 public class Patterns { 10 /** 11 * Regular expression to match all IANA top-level domains. 12 * List accurate as of 2011/07/18. List taken from: 13 * http://data.iana.org/TLD/tlds-alpha-by-domain.txt 14 * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 15 * 16 * @deprecated Due to the recent profileration of gTLDs, this API is 17 * expected to become out-of-date very quickly. Therefore it is now 18 * deprecated. 19 */ 20 @Deprecated 21 public static final String TOP_LEVEL_DOMAIN_STR = 22 "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])" 23 + "|(biz|b[abdefghijmnorstvwyz])" 24 + "|(cat|com|coop|c[acdfghiklmnoruvxyz])" 25 + "|d[ejkmoz]" 26 + "|(edu|e[cegrstu])" 27 + "|f[ijkmor]" 28 + "|(gov|g[abdefghilmnpqrstuwy])" 29 + "|h[kmnrtu]" 30 + "|(info|int|i[delmnoqrst])" 31 + "|(jobs|j[emop])" 32 + "|k[eghimnprwyz]" 33 + "|l[abcikrstuvy]" 34 + "|(mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])" 35 + "|(name|net|n[acefgilopruz])" 36 + "|(org|om)" 37 + "|(pro|p[aefghklmnrstwy])" 38 + "|qa" 39 + "|r[eosuw]" 40 + "|s[abcdeghijklmnortuvyz]" 41 + "|(tel|travel|t[cdfghjklmnoprtvwz])" 42 + "|u[agksyz]" 43 + "|v[aceginu]" 44 + "|w[fs]" 45 + "|(u03b4u03bfu03bau03b9u03bcu03ae|u0438u0441u043fu044bu0442u0430u043du0438u0435|u0440u0444|u0441u0440u0431|u05d8u05e2u05e1u05d8|u0622u0632u0645u0627u06ccu0634u06cc|u0625u062eu062au0628u0627u0631|u0627u0644u0627u0631u062fu0646|u0627u0644u062cu0632u0627u0626u0631|u0627u0644u0633u0639u0648u062fu064au0629|u0627u0644u0645u063au0631u0628|u0627u0645u0627u0631u0627u062a|u0628u06beu0627u0631u062a|u062au0648u0646u0633|u0633u0648u0631u064au0629|u0641u0644u0633u0637u064au0646|u0642u0637u0631|u0645u0635u0631|u092au0930u0940u0915u094du0937u093e|u092du093eu0930u0924|u09adu09beu09b0u09a4|u0a2du0a3eu0a30u0a24|u0aadu0abeu0ab0u0aa4|u0b87u0ba8u0bcdu0ba4u0bbfu0bafu0bbe|u0b87u0bb2u0b99u0bcdu0b95u0bc8|u0b9au0bbfu0b99u0bcdu0b95u0baau0bcdu0baau0bc2u0bb0u0bcd|u0baau0bb0u0bbfu0b9fu0bcdu0b9au0bc8|u0c2du0c3eu0c30u0c24u0c4d|u0dbdu0d82u0d9au0dcf|u0e44u0e17u0e22|u30c6u30b9u30c8|u4e2du56fd|u4e2du570b|u53f0u6e7e|u53f0u7063|u65b0u52a0u5761|u6d4bu8bd5|u6e2cu8a66|u9999u6e2f|ud14cuc2a4ud2b8|ud55cuad6d|xn\-\-0zwm56d|xn\-\-11b5bs3a9aj6g|xn\-\-3e0b707e|xn\-\-45brj9c|xn\-\-80akhbyknj4f|xn\-\-90a3ac|xn\-\-9t4b11yi5a|xn\-\-clchc0ea0b2g2a9gcd|xn\-\-deba0ad|xn\-\-fiqs8s|xn\-\-fiqz9s|xn\-\-fpcrj9c3d|xn\-\-fzc2c9e2c|xn\-\-g6w251d|xn\-\-gecrj9c|xn\-\-h2brj9c|xn\-\-hgbk6aj7f53bba|xn\-\-hlcj6aya9esc7a|xn\-\-j6w193g|xn\-\-jxalpdlp|xn\-\-kgbechtv|xn\-\-kprw13d|xn\-\-kpry57d|xn\-\-lgbbat1ad8j|xn\-\-mgbaam7a8h|xn\-\-mgbayh7gpa|xn\-\-mgbbh1a71e|xn\-\-mgbc0a9azcg|xn\-\-mgberp4a5d4ar|xn\-\-o3cw4h|xn\-\-ogbpf8fl|xn\-\-p1ai|xn\-\-pgbs0dh|xn\-\-s9brj9c|xn\-\-wgbh1c|xn\-\-wgbl6a|xn\-\-xkc2al3hye2a|xn\-\-xkc2dl3a5ee0h|xn\-\-yfro4i67o|xn\-\-ygbi2ammx|xn\-\-zckzah|xxx)" 46 + "|y[et]" 47 + "|z[amw])"; 48 49 /** 50 * Regular expression pattern to match all IANA top-level domains. 51 * @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}. 52 */ 53 @Deprecated 54 public static final Pattern TOP_LEVEL_DOMAIN = 55 Pattern.compile(TOP_LEVEL_DOMAIN_STR); 56 57 /** 58 * Regular expression to match all IANA top-level domains for WEB_URL. 59 * List accurate as of 2011/07/18. List taken from: 60 * http://data.iana.org/TLD/tlds-alpha-by-domain.txt 61 * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 62 * 63 * @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}. 64 */ 65 @Deprecated 66 public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL = 67 "(?:" 68 + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])" 69 + "|(?:biz|b[abdefghijmnorstvwyz])" 70 + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])" 71 + "|d[ejkmoz]" 72 + "|(?:edu|e[cegrstu])" 73 + "|f[ijkmor]" 74 + "|(?:gov|g[abdefghilmnpqrstuwy])" 75 + "|h[kmnrtu]" 76 + "|(?:info|int|i[delmnoqrst])" 77 + "|(?:jobs|j[emop])" 78 + "|k[eghimnprwyz]" 79 + "|l[abcikrstuvy]" 80 + "|(?:mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])" 81 + "|(?:name|net|n[acefgilopruz])" 82 + "|(?:org|om)" 83 + "|(?:pro|p[aefghklmnrstwy])" 84 + "|qa" 85 + "|r[eosuw]" 86 + "|s[abcdeghijklmnortuvyz]" 87 + "|(?:tel|travel|t[cdfghjklmnoprtvwz])" 88 + "|u[agksyz]" 89 + "|v[aceginu]" 90 + "|w[fs]" 91 + "|(?:u03b4u03bfu03bau03b9u03bcu03ae|u0438u0441u043fu044bu0442u0430u043du0438u0435|u0440u0444|u0441u0440u0431|u05d8u05e2u05e1u05d8|u0622u0632u0645u0627u06ccu0634u06cc|u0625u062eu062au0628u0627u0631|u0627u0644u0627u0631u062fu0646|u0627u0644u062cu0632u0627u0626u0631|u0627u0644u0633u0639u0648u062fu064au0629|u0627u0644u0645u063au0631u0628|u0627u0645u0627u0631u0627u062a|u0628u06beu0627u0631u062a|u062au0648u0646u0633|u0633u0648u0631u064au0629|u0641u0644u0633u0637u064au0646|u0642u0637u0631|u0645u0635u0631|u092au0930u0940u0915u094du0937u093e|u092du093eu0930u0924|u09adu09beu09b0u09a4|u0a2du0a3eu0a30u0a24|u0aadu0abeu0ab0u0aa4|u0b87u0ba8u0bcdu0ba4u0bbfu0bafu0bbe|u0b87u0bb2u0b99u0bcdu0b95u0bc8|u0b9au0bbfu0b99u0bcdu0b95u0baau0bcdu0baau0bc2u0bb0u0bcd|u0baau0bb0u0bbfu0b9fu0bcdu0b9au0bc8|u0c2du0c3eu0c30u0c24u0c4d|u0dbdu0d82u0d9au0dcf|u0e44u0e17u0e22|u30c6u30b9u30c8|u4e2du56fd|u4e2du570b|u53f0u6e7e|u53f0u7063|u65b0u52a0u5761|u6d4bu8bd5|u6e2cu8a66|u9999u6e2f|ud14cuc2a4ud2b8|ud55cuad6d|xn\-\-0zwm56d|xn\-\-11b5bs3a9aj6g|xn\-\-3e0b707e|xn\-\-45brj9c|xn\-\-80akhbyknj4f|xn\-\-90a3ac|xn\-\-9t4b11yi5a|xn\-\-clchc0ea0b2g2a9gcd|xn\-\-deba0ad|xn\-\-fiqs8s|xn\-\-fiqz9s|xn\-\-fpcrj9c3d|xn\-\-fzc2c9e2c|xn\-\-g6w251d|xn\-\-gecrj9c|xn\-\-h2brj9c|xn\-\-hgbk6aj7f53bba|xn\-\-hlcj6aya9esc7a|xn\-\-j6w193g|xn\-\-jxalpdlp|xn\-\-kgbechtv|xn\-\-kprw13d|xn\-\-kpry57d|xn\-\-lgbbat1ad8j|xn\-\-mgbaam7a8h|xn\-\-mgbayh7gpa|xn\-\-mgbbh1a71e|xn\-\-mgbc0a9azcg|xn\-\-mgberp4a5d4ar|xn\-\-o3cw4h|xn\-\-ogbpf8fl|xn\-\-p1ai|xn\-\-pgbs0dh|xn\-\-s9brj9c|xn\-\-wgbh1c|xn\-\-wgbl6a|xn\-\-xkc2al3hye2a|xn\-\-xkc2dl3a5ee0h|xn\-\-yfro4i67o|xn\-\-ygbi2ammx|xn\-\-zckzah|xxx)" 92 + "|y[et]" 93 + "|z[amw]))"; 94 95 /** 96 * Regular expression to match all IANA top-level domains. 97 * 98 * List accurate as of 2015/11/24. List taken from: 99 * http://data.iana.org/TLD/tlds-alpha-by-domain.txt 100 * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 101 * 102 * @hide 103 */ 104 static final String IANA_TOP_LEVEL_DOMAINS = 105 "(?:" 106 + "(?:aaa|aarp|abb|abbott|abogado|academy|accenture|accountant|accountants|aco|active" 107 + "|actor|ads|adult|aeg|aero|afl|agency|aig|airforce|airtel|allfinanz|alsace|amica|amsterdam" 108 + "|android|apartments|app|apple|aquarelle|aramco|archi|army|arpa|arte|asia|associates" 109 + "|attorney|auction|audio|auto|autos|axa|azure|a[cdefgilmoqrstuwxz])" 110 + "|(?:band|bank|bar|barcelona|barclaycard|barclays|bargains|bauhaus|bayern|bbc|bbva" 111 + "|bcn|beats|beer|bentley|berlin|best|bet|bharti|bible|bid|bike|bing|bingo|bio|biz|black" 112 + "|blackfriday|bloomberg|blue|bms|bmw|bnl|bnpparibas|boats|bom|bond|boo|boots|boutique" 113 + "|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|build|builders|business" 114 + "|buzz|bzh|b[abdefghijmnorstvwyz])" 115 + "|(?:cab|cafe|cal|camera|camp|cancerresearch|canon|capetown|capital|car|caravan|cards" 116 + "|care|career|careers|cars|cartier|casa|cash|casino|cat|catering|cba|cbn|ceb|center|ceo" 117 + "|cern|cfa|cfd|chanel|channel|chat|cheap|chloe|christmas|chrome|church|cipriani|cisco" 118 + "|citic|city|cityeats|claims|cleaning|click|clinic|clothing|cloud|club|clubmed|coach" 119 + "|codes|coffee|college|cologne|com|commbank|community|company|computer|comsec|condos" 120 + "|construction|consulting|contractors|cooking|cool|coop|corsica|country|coupons|courses" 121 + "|credit|creditcard|creditunion|cricket|crown|crs|cruises|csc|cuisinella|cymru|cyou|c[acdfghiklmnoruvwxyz])" 122 + "|(?:dabur|dad|dance|date|dating|datsun|day|dclk|deals|degree|delivery|dell|delta" 123 + "|democrat|dental|dentist|desi|design|dev|diamonds|diet|digital|direct|directory|discount" 124 + "|dnp|docs|dog|doha|domains|doosan|download|drive|durban|dvag|d[ejkmoz])" 125 + "|(?:earth|eat|edu|education|email|emerck|energy|engineer|engineering|enterprises" 126 + "|epson|equipment|erni|esq|estate|eurovision|eus|events|everbank|exchange|expert|exposed" 127 + "|express|e[cegrstu])" 128 + "|(?:fage|fail|fairwinds|faith|family|fan|fans|farm|fashion|feedback|ferrero|film" 129 + "|final|finance|financial|firmdale|fish|fishing|fit|fitness|flights|florist|flowers|flsmidth" 130 + "|fly|foo|football|forex|forsale|forum|foundation|frl|frogans|fund|furniture|futbol|fyi" 131 + "|f[ijkmor])" 132 + "|(?:gal|gallery|game|garden|gbiz|gdn|gea|gent|genting|ggee|gift|gifts|gives|giving" 133 + "|glass|gle|global|globo|gmail|gmo|gmx|gold|goldpoint|golf|goo|goog|google|gop|gov|grainger" 134 + "|graphics|gratis|green|gripe|group|gucci|guge|guide|guitars|guru|g[abdefghilmnpqrstuwy])" 135 + "|(?:hamburg|hangout|haus|healthcare|help|here|hermes|hiphop|hitachi|hiv|hockey|holdings" 136 + "|holiday|homedepot|homes|honda|horse|host|hosting|hoteles|hotmail|house|how|hsbc|hyundai" 137 + "|h[kmnrtu])" 138 + "|(?:ibm|icbc|ice|icu|ifm|iinet|immo|immobilien|industries|infiniti|info|ing|ink|institute" 139 + "|insure|int|international|investments|ipiranga|irish|ist|istanbul|itau|iwc|i[delmnoqrst])" 140 + "|(?:jaguar|java|jcb|jetzt|jewelry|jlc|jll|jobs|joburg|jprs|juegos|j[emop])" 141 + "|(?:kaufen|kddi|kia|kim|kinder|kitchen|kiwi|koeln|komatsu|krd|kred|kyoto|k[eghimnprwyz])" 142 + "|(?:lacaixa|lancaster|land|landrover|lasalle|lat|latrobe|law|lawyer|lds|lease|leclerc" 143 + "|legal|lexus|lgbt|liaison|lidl|life|lifestyle|lighting|limited|limo|linde|link|live" 144 + "|lixil|loan|loans|lol|london|lotte|lotto|love|ltd|ltda|lupin|luxe|luxury|l[abcikrstuvy])" 145 + "|(?:madrid|maif|maison|man|management|mango|market|marketing|markets|marriott|mba" 146 + "|media|meet|melbourne|meme|memorial|men|menu|meo|miami|microsoft|mil|mini|mma|mobi|moda" 147 + "|moe|moi|mom|monash|money|montblanc|mormon|mortgage|moscow|motorcycles|mov|movie|movistar" 148 + "|mtn|mtpc|mtr|museum|mutuelle|m[acdeghklmnopqrstuvwxyz])" 149 + "|(?:nadex|nagoya|name|navy|nec|net|netbank|network|neustar|new|news|nexus|ngo|nhk" 150 + "|nico|ninja|nissan|nokia|nra|nrw|ntt|nyc|n[acefgilopruz])" 151 + "|(?:obi|office|okinawa|omega|one|ong|onl|online|ooo|oracle|orange|org|organic|osaka" 152 + "|otsuka|ovh|om)" 153 + "|(?:page|panerai|paris|partners|parts|party|pet|pharmacy|philips|photo|photography" 154 + "|photos|physio|piaget|pics|pictet|pictures|ping|pink|pizza|place|play|playstation|plumbing" 155 + "|plus|pohl|poker|porn|post|praxi|press|pro|prod|productions|prof|properties|property" 156 + "|protection|pub|p[aefghklmnrstwy])" 157 + "|(?:qpon|quebec|qa)" 158 + "|(?:racing|realtor|realty|recipes|red|redstone|rehab|reise|reisen|reit|ren|rent|rentals" 159 + "|repair|report|republican|rest|restaurant|review|reviews|rich|ricoh|rio|rip|rocher|rocks" 160 + "|rodeo|rsvp|ruhr|run|rwe|ryukyu|r[eosuw])" 161 + "|(?:saarland|sakura|sale|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|saxo" 162 + "|sbs|sca|scb|schmidt|scholarships|school|schule|schwarz|science|scor|scot|seat|security" 163 + "|seek|sener|services|seven|sew|sex|sexy|shiksha|shoes|show|shriram|singles|site|ski" 164 + "|sky|skype|sncf|soccer|social|software|sohu|solar|solutions|sony|soy|space|spiegel|spreadbetting" 165 + "|srl|stada|starhub|statoil|stc|stcgroup|stockholm|studio|study|style|sucks|supplies" 166 + "|supply|support|surf|surgery|suzuki|swatch|swiss|sydney|systems|s[abcdeghijklmnortuvxyz])" 167 + "|(?:tab|taipei|tatamotors|tatar|tattoo|tax|taxi|team|tech|technology|tel|telefonica" 168 + "|temasek|tennis|thd|theater|theatre|tickets|tienda|tips|tires|tirol|today|tokyo|tools" 169 + "|top|toray|toshiba|tours|town|toyota|toys|trade|trading|training|travel|trust|tui|t[cdfghjklmnortvwz])" 170 + "|(?:ubs|university|uno|uol|u[agksyz])" 171 + "|(?:vacations|vana|vegas|ventures|versicherung|vet|viajes|video|villas|vin|virgin" 172 + "|vision|vista|vistaprint|viva|vlaanderen|vodka|vote|voting|voto|voyage|v[aceginu])" 173 + "|(?:wales|walter|wang|watch|webcam|website|wed|wedding|weir|whoswho|wien|wiki|williamhill" 174 + "|win|windows|wine|wme|work|works|world|wtc|wtf|w[fs])" 175 + "|(?:u03b5u03bb|u0431u0435u043b|u0434u0435u0442u0438|u043au043eu043c|u043cu043au0434" 176 + "|u043cu043eu043d|u043cu043eu0441u043au0432u0430|u043eu043du043bu0430u0439u043d" 177 + "|u043eu0440u0433|u0440u0443u0441|u0440u0444|u0441u0430u0439u0442|u0441u0440u0431" 178 + "|u0443u043au0440|u049bu0430u0437|u0570u0561u0575|u05e7u05d5u05dd|u0627u0631u0627u0645u0643u0648" 179 + "|u0627u0644u0627u0631u062fu0646|u0627u0644u062cu0632u0627u0626u0631|u0627u0644u0633u0639u0648u062fu064au0629" 180 + "|u0627u0644u0645u063au0631u0628|u0627u0645u0627u0631u0627u062a|u0627u06ccu0631u0627u0646" 181 + "|u0628u0627u0632u0627u0631|u0628u06beu0627u0631u062a|u062au0648u0646u0633" 182 + "|u0633u0648u062fu0627u0646|u0633u0648u0631u064au0629|u0634u0628u0643u0629" 183 + "|u0639u0631u0627u0642|u0639u0645u0627u0646|u0641u0644u0633u0637u064au0646" 184 + "|u0642u0637u0631|u0643u0648u0645|u0645u0635u0631|u0645u0644u064au0633u064au0627" 185 + "|u0645u0648u0642u0639|u0915u0949u092e|u0928u0947u091f|u092du093eu0930u0924" 186 + "|u0938u0902u0917u0920u0928|u09adu09beu09b0u09a4|u0a2du0a3eu0a30u0a24|u0aadu0abeu0ab0u0aa4" 187 + "|u0b87u0ba8u0bcdu0ba4u0bbfu0bafu0bbe|u0b87u0bb2u0b99u0bcdu0b95u0bc8|u0b9au0bbfu0b99u0bcdu0b95u0baau0bcdu0baau0bc2u0bb0u0bcd" 188 + "|u0c2du0c3eu0c30u0c24u0c4d|u0dbdu0d82u0d9au0dcf|u0e04u0e2du0e21|u0e44u0e17u0e22" 189 + "|u10d2u10d4|u307fu3093u306a|u30b0u30fcu30b0u30eb|u30b3u30e0|u4e16u754c" 190 + "|u4e2du4fe1|u4e2du56fd|u4e2du570b|u4e2du6587u7f51|u4f01u4e1a|u4f5bu5c71" 191 + "|u4fe1u606f|u5065u5eb7|u516bu5366|u516cu53f8|u516cu76ca|u53f0u6e7e|u53f0u7063" 192 + "|u5546u57ce|u5546u5e97|u5546u6807|u5728u7ebf|u5927u62ff|u5a31u4e50|u5de5u884c" 193 + "|u5e7fu4e1c|u6148u5584|u6211u7231u4f60|u624bu673a|u653fu52a1|u653fu5e9c" 194 + "|u65b0u52a0u5761|u65b0u95fb|u65f6u5c1a|u673au6784|u6de1u9a6cu9521|u6e38u620f" 195 + "|u70b9u770b|u79fbu52a8|u7ec4u7ec7u673au6784|u7f51u5740|u7f51u5e97|u7f51u7edc" 196 + "|u8c37u6b4c|u96c6u56e2|u98deu5229u6d66|u9910u5385|u9999u6e2f|ub2f7ub137" 197 + "|ub2f7ucef4|uc0bcuc131|ud55cuad6d|xbox" 198 + "|xerox|xin|xn\-\-11b4c3d|xn\-\-1qqw23a|xn\-\-30rr7y|xn\-\-3bst00m|xn\-\-3ds443g" 199 + "|xn\-\-3e0b707e|xn\-\-3pxu8k|xn\-\-42c2d9a|xn\-\-45brj9c|xn\-\-45q11c|xn\-\-4gbrim" 200 + "|xn\-\-55qw42g|xn\-\-55qx5d|xn\-\-6frz82g|xn\-\-6qq986b3xl|xn\-\-80adxhks" 201 + "|xn\-\-80ao21a|xn\-\-80asehdb|xn\-\-80aswg|xn\-\-90a3ac|xn\-\-90ais|xn\-\-9dbq2a" 202 + "|xn\-\-9et52u|xn\-\-b4w605ferd|xn\-\-c1avg|xn\-\-c2br7g|xn\-\-cg4bki|xn\-\-clchc0ea0b2g2a9gcd" 203 + "|xn\-\-czr694b|xn\-\-czrs0t|xn\-\-czru2d|xn\-\-d1acj3b|xn\-\-d1alf|xn\-\-efvy88h" 204 + "|xn\-\-estv75g|xn\-\-fhbei|xn\-\-fiq228c5hs|xn\-\-fiq64b|xn\-\-fiqs8s|xn\-\-fiqz9s" 205 + "|xn\-\-fjq720a|xn\-\-flw351e|xn\-\-fpcrj9c3d|xn\-\-fzc2c9e2c|xn\-\-gecrj9c" 206 + "|xn\-\-h2brj9c|xn\-\-hxt814e|xn\-\-i1b6b1a6a2e|xn\-\-imr513n|xn\-\-io0a7i" 207 + "|xn\-\-j1aef|xn\-\-j1amh|xn\-\-j6w193g|xn\-\-kcrx77d1x4a|xn\-\-kprw13d|xn\-\-kpry57d" 208 + "|xn\-\-kput3i|xn\-\-l1acc|xn\-\-lgbbat1ad8j|xn\-\-mgb9awbf|xn\-\-mgba3a3ejt" 209 + "|xn\-\-mgba3a4f16a|xn\-\-mgbaam7a8h|xn\-\-mgbab2bd|xn\-\-mgbayh7gpa|xn\-\-mgbbh1a71e" 210 + "|xn\-\-mgbc0a9azcg|xn\-\-mgberp4a5d4ar|xn\-\-mgbpl2fh|xn\-\-mgbtx2b|xn\-\-mgbx4cd0ab" 211 + "|xn\-\-mk1bu44c|xn\-\-mxtq1m|xn\-\-ngbc5azd|xn\-\-node|xn\-\-nqv7f|xn\-\-nqv7fs00ema" 212 + "|xn\-\-nyqy26a|xn\-\-o3cw4h|xn\-\-ogbpf8fl|xn\-\-p1acf|xn\-\-p1ai|xn\-\-pgbs0dh" 213 + "|xn\-\-pssy2u|xn\-\-q9jyb4c|xn\-\-qcka1pmc|xn\-\-qxam|xn\-\-rhqv96g|xn\-\-s9brj9c" 214 + "|xn\-\-ses554g|xn\-\-t60b56a|xn\-\-tckwe|xn\-\-unup4y|xn\-\-vermgensberater\-ctb" 215 + "|xn\-\-vermgensberatung\-pwb|xn\-\-vhquv|xn\-\-vuq861b|xn\-\-wgbh1c|xn\-\-wgbl6a" 216 + "|xn\-\-xhq521b|xn\-\-xkc2al3hye2a|xn\-\-xkc2dl3a5ee0h|xn\-\-y9a3aq|xn\-\-yfro4i67o" 217 + "|xn\-\-ygbi2ammx|xn\-\-zfr164b|xperia|xxx|xyz)" 218 + "|(?:yachts|yamaxun|yandex|yodobashi|yoga|yokohama|youtube|y[et])" 219 + "|(?:zara|zip|zone|zuerich|z[amw]))"; 220 221 /** 222 * Kept for backward compatibility reasons. 223 * 224 * @deprecated Deprecated since it does not include all IRI characters defined in RFC 3987 225 */ 226 @Deprecated 227 public static final String GOOD_IRI_CHAR = 228 "a-zA-Z0-9u00A0-uD7FFuF900-uFDCFuFDF0-uFFEF"; 229 230 public static final Pattern IP_ADDRESS 231 = Pattern.compile( 232 "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\.(25[0-5]|2[0-4]" 233 + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]" 234 + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" 235 + "|[1-9][0-9]|[0-9]))"); 236 237 /** 238 * Valid UCS characters defined in RFC 3987. Excludes space characters. 239 */ 240 private static final String UCS_CHAR = "[" + 241 "u00A0-uD7FF" + 242 "uF900-uFDCF" + 243 "uFDF0-uFFEF" + 244 "uD800uDC00-uD83FuDFFD" + 245 "uD840uDC00-uD87FuDFFD" + 246 "uD880uDC00-uD8BFuDFFD" + 247 "uD8C0uDC00-uD8FFuDFFD" + 248 "uD900uDC00-uD93FuDFFD" + 249 "uD940uDC00-uD97FuDFFD" + 250 "uD980uDC00-uD9BFuDFFD" + 251 "uD9C0uDC00-uD9FFuDFFD" + 252 "uDA00uDC00-uDA3FuDFFD" + 253 "uDA40uDC00-uDA7FuDFFD" + 254 "uDA80uDC00-uDABFuDFFD" + 255 "uDAC0uDC00-uDAFFuDFFD" + 256 "uDB00uDC00-uDB3FuDFFD" + 257 "uDB44uDC00-uDB7FuDFFD" + 258 "&&[^u00A0[u2000-u200A]u2028u2029u202Fu3000]]"; 259 260 /** 261 * Valid characters for IRI label defined in RFC 3987. 262 */ 263 private static final String LABEL_CHAR = "a-zA-Z0-9" + UCS_CHAR; 264 265 /** 266 * Valid characters for IRI TLD defined in RFC 3987. 267 */ 268 private static final String TLD_CHAR = "a-zA-Z" + UCS_CHAR; 269 270 /** 271 * RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets. 272 */ 273 private static final String IRI_LABEL = 274 "[" + LABEL_CHAR + "](?:[" + LABEL_CHAR + "\-]{0,61}[" + LABEL_CHAR + "]){0,1}"; 275 276 /** 277 * RFC 3492 references RFC 1034 and limits Punycode algorithm output to 63 characters. 278 */ 279 private static final String PUNYCODE_TLD = "xn\-\-[\w\-]{0,58}\w"; 280 281 private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")"; 282 283 private static final String HOST_NAME = "(" + IRI_LABEL + "\.)+" + TLD; 284 285 public static final Pattern DOMAIN_NAME 286 = Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")"); 287 288 private static final String PROTOCOL = "(?i:http|https|rtsp):\/\/"; 289 290 /* A word boundary or end of input. This is to stop foo.sure from matching as foo.su */ 291 private static final String WORD_BOUNDARY = "(?:\b|$|^)"; 292 293 private static final String USER_INFO = "(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)" 294 + "\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,64}(?:\:(?:[a-zA-Z0-9\$\-\_" 295 + "\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,25})?\@"; 296 297 private static final String PORT_NUMBER = "\:\d{1,5}"; 298 299 private static final String PATH_AND_QUERY = "\/(?:(?:[" + LABEL_CHAR 300 + "\;\/\?\:\@\&\=\#\~" // plus optional query params 301 + "\-\.\+\!\*\'\(\)\,\_])|(?:\%[a-fA-F0-9]{2}))*"; 302 303 /** 304 * Regular expression pattern to match most part of RFC 3987 305 * Internationalized URLs, aka IRIs. 306 */ 307 public static final Pattern WEB_URL = Pattern.compile("(" 308 + "(" 309 + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")?" 310 + "(?:" + DOMAIN_NAME + ")" 311 + "(?:" + PORT_NUMBER + ")?" 312 + ")" 313 + "(" + PATH_AND_QUERY + ")?" 314 + WORD_BOUNDARY 315 + ")"); 316 317 /** 318 * Regular expression that matches known TLDs and punycode TLDs 319 */ 320 private static final String STRICT_TLD = "(?:" + 321 IANA_TOP_LEVEL_DOMAINS + "|" + PUNYCODE_TLD + ")"; 322 323 /** 324 * Regular expression that matches host names using {@link #STRICT_TLD} 325 */ 326 private static final String STRICT_HOST_NAME = "(?:(?:" + IRI_LABEL + "\.)+" 327 + STRICT_TLD + ")"; 328 329 /** 330 * Regular expression that matches domain names using either {@link #STRICT_HOST_NAME} or 331 * {@link #IP_ADDRESS} 332 */ 333 private static final Pattern STRICT_DOMAIN_NAME 334 = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")"); 335 336 /** 337 * Regular expression that matches domain names without a TLD 338 */ 339 private static final String RELAXED_DOMAIN_NAME = 340 "(?:" + "(?:" + IRI_LABEL + "(?:\.(?=\S))" +"?)+" + "|" + IP_ADDRESS + ")"; 341 342 /** 343 * Regular expression to match strings that do not start with a supported protocol. The TLDs 344 * are expected to be one of the known TLDs. 345 */ 346 private static final String WEB_URL_WITHOUT_PROTOCOL = "(" 347 + WORD_BOUNDARY 348 + "(?<!:\/\/)" 349 + "(" 350 + "(?:" + STRICT_DOMAIN_NAME + ")" 351 + "(?:" + PORT_NUMBER + ")?" 352 + ")" 353 + "(?:" + PATH_AND_QUERY + ")?" 354 + WORD_BOUNDARY 355 + ")"; 356 357 /** 358 * Regular expression to match strings that start with a supported protocol. Rules for domain 359 * names and TLDs are more relaxed. TLDs are optional. 360 */ 361 private static final String WEB_URL_WITH_PROTOCOL = "(" 362 + WORD_BOUNDARY 363 + "(?:" 364 + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")" 365 + "(?:" + RELAXED_DOMAIN_NAME + ")?" 366 + "(?:" + PORT_NUMBER + ")?" 367 + ")" 368 + "(?:" + PATH_AND_QUERY + ")?" 369 + WORD_BOUNDARY 370 + ")"; 371 372 /** 373 * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression 374 * tries to match the URL structure with a relaxed rule for TLDs. If the string does not start 375 * with http(s):// the TLDs are expected to be one of the known TLDs. 376 * 377 * @hide 378 */ 379 public static final Pattern AUTOLINK_WEB_URL = Pattern.compile( 380 "(" + WEB_URL_WITH_PROTOCOL + "|" + WEB_URL_WITHOUT_PROTOCOL + ")"); 381 382 /** 383 * Regular expression for valid email characters. Does not include some of the valid characters 384 * defined in RFC5321: #&~!^`{}/=$*?| 385 */ 386 private static final String EMAIL_CHAR = LABEL_CHAR + "\+\-_%'"; 387 388 /** 389 * Regular expression for local part of an email address. RFC5321 section 4.5.3.1.1 limits 390 * the local part to be at most 64 octets. 391 */ 392 private static final String EMAIL_ADDRESS_LOCAL_PART = 393 "[" + EMAIL_CHAR + "]" + "(?:[" + EMAIL_CHAR + "\.]{1,62}[" + EMAIL_CHAR + "])?"; 394 395 /** 396 * Regular expression for the domain part of an email address. RFC5321 section 4.5.3.1.2 limits 397 * the domain to be at most 255 octets. 398 */ 399 private static final String EMAIL_ADDRESS_DOMAIN = 400 "(?=.{1,255}(?:\s|$|^))" + HOST_NAME; 401 402 /** 403 * Regular expression pattern to match email addresses. It excludes double quoted local parts 404 * and the special characters #&~!^`{}/=$*?| that are included in RFC5321. 405 * @hide 406 */ 407 public static final Pattern AUTOLINK_EMAIL_ADDRESS = Pattern.compile("(" + WORD_BOUNDARY + 408 "(?:" + EMAIL_ADDRESS_LOCAL_PART + "@" + EMAIL_ADDRESS_DOMAIN + ")" + 409 WORD_BOUNDARY + ")" 410 ); 411 412 public static final Pattern EMAIL_ADDRESS 413 = Pattern.compile( 414 "[a-zA-Z0-9\+\.\_\%\-\+]{1,256}" + 415 "\@" + 416 "[a-zA-Z0-9][a-zA-Z0-9\-]{0,64}" + 417 "(" + 418 "\." + 419 "[a-zA-Z0-9][a-zA-Z0-9\-]{0,25}" + 420 ")+" 421 ); 422 423 /** 424 * This pattern is intended for searching for things that look like they 425 * might be phone numbers in arbitrary text, not for validating whether 426 * something is in fact a phone number. It will miss many things that 427 * are legitimate phone numbers. 428 * 429 * <p> The pattern matches the following: 430 * <ul> 431 * <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes 432 * may follow. 433 * <li>Optionally, sets of digits in parentheses, separated by spaces, dots, or dashes. 434 * <li>A string starting and ending with a digit, containing digits, spaces, dots, and/or dashes. 435 * </ul> 436 */ 437 public static final Pattern PHONE 438 = Pattern.compile( // sdd = space, dot, or dash 439 "(\+[0-9]+[\- \.]*)?" // +<digits><sdd>* 440 + "(\([0-9]+\)[\- \.]*)?" // (<digits>)<sdd>* 441 + "([0-9][0-9\- \.]+[0-9])"); // <digit><digit|sdd>+<digit> 442 443 /** 444 * Convenience method to take all of the non-null matching groups in a 445 * regex Matcher and return them as a concatenated string. 446 * 447 * @param matcher The Matcher object from which grouped text will 448 * be extracted 449 * 450 * @return A String comprising all of the non-null matched 451 * groups concatenated together 452 */ 453 public static final String concatGroups(Matcher matcher) { 454 StringBuilder b = new StringBuilder(); 455 final int numGroups = matcher.groupCount(); 456 457 for (int i = 1; i <= numGroups; i++) { 458 String s = matcher.group(i); 459 460 if (s != null) { 461 b.append(s); 462 } 463 } 464 465 return b.toString(); 466 } 467 468 /** 469 * Convenience method to return only the digits and plus signs 470 * in the matching string. 471 * 472 * @param matcher The Matcher object from which digits and plus will 473 * be extracted 474 * 475 * @return A String comprising all of the digits and plus in 476 * the match 477 */ 478 public static final String digitsAndPlusOnly(Matcher matcher) { 479 StringBuilder buffer = new StringBuilder(); 480 String matchingRegion = matcher.group(); 481 482 for (int i = 0, size = matchingRegion.length(); i < size; i++) { 483 char character = matchingRegion.charAt(i); 484 485 if (character == '+' || Character.isDigit(character)) { 486 buffer.append(character); 487 } 488 } 489 return buffer.toString(); 490 } 491 492 /** 493 * Do not create this static utility class. 494 */ 495 private Patterns() {} 496 }
调用方法
Matcher matcher = Patterns.WEB_URL.matcher(message); while (matcher.find()) { //循环输出所有匹配到的链接,并加上链接 String link = matcher.group(); String restr = "<a href='" + link + "' target='_blank'>" + link + "</a>"; message = message.replaceAll(link, restr); }
说明:
1、message为要进行提取的字符串
2、http/https可以获取,但是在链接结尾不能和其他文字连在一起,不然获取不准确
即:"你真的是https://www.cnblogs.com/pxblog博客网"会获取到"https://www.cnblogs.com/pxblog博客网"
"你真的是https://www.cnblogs.com/pxblog 博客网"会获取到"https://www.cnblogs.com/pxblog"