zoukankan      html  css  js  c++  java
  • URL 正则表达式,全【转】

    由于工作需要写了一个包括所有Url的正则表达式,用来验证返回的Url是否符合RFC1738规定。

    有兴趣的同学可以去看RFC1378关于Url部分的介绍(http://www.ietf.org/rfc/rfc1738.txt),本文中的代码是按其规定编写的。

    在没有了解RFC1738的时候,一直以为Url的正则表达式很简单,没想到Url有这么多分类,更没想到一个普通的http的正则表达式也不是那么简单。

    以下是我搜到的关于http的正则表达式:

    http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?


    当然这已经满足大部分人的需求了,但是如果需要严格的验证的话还是要符合RFC1738了。

    Url包括Http,Ftp,News,Nntpurl,Telnet,Gopher,Wais,Mailto,File,Prosperurl和Otherurl。

    呵呵,废话不多说了,上代码

    #region Http

     

                string lowalpha = @"[a-z]";

                string hialpha = @"[A-Z]";

                string alpha = String.Format(@"({0}|{1})", lowalpha, hialpha);

                string digit = @"[0-9]";

                string safe = @"(\$|-|_|\.|\+)";

                string extra = @"(!|\*|'|\(|\)|,)";

                string hex = String.Format(@"({0}|A|B|C|D|E|F|a|b|c|d|e|f)", digit);

                string escape = String.Format(@"(%{0}{0})", hex);

                string unreserved = String.Format(@"({0}|{1}|{2}|{3})", alpha, digit, safe, extra);

                string uchar = String.Format(@"({0}|{1})", unreserved, escape);

                string reserved = @"(;|/|\?|:|@|&|=)";

                string xchar = String.Format(@"({0}|{1}|{2})", unreserved, reserved, escape);

                string digits = String.Format(@"({0}+)", digit);

                string alphadigit = String.Format(@"({0}|{1})", alpha, digit);

                string domainlabel = String.Format(@"({0}|{0}({0}|-)*{0})", alphadigit);

                string toplabel = String.Format(@"({0}|{0}({1}|-)*{1})", alpha, alphadigit);

                string hostname = String.Format(@"(({0}\.)*{1})", domainlabel, toplabel);

                string hostnumber = String.Format(@"{0}\.{0}\.{0}\.{0}", digits);

                string host = String.Format(@"({0}|{1})", hostname, hostnumber);

                string port = digits;

                string hostport = String.Format(@"({0}(:{1}){{0,1}})", host, port);

                string hsegment = String.Format(@"(({0}|;|:|@|&|=)*)", uchar);

                string search = String.Format(@"(({0}|;|:|@|&|=)*)", uchar);

                string hpath = String.Format(@"{0}(/{0})*", hsegment);

                string httpurl = String.Format(@"http://{0}(/{1}(\?{2}){{0,1}}){{0,1}}", hostport, hpath, search);

                #endregion

    #region Ftp

                string user = String.Format(@"(({0}|;|\?|&|=)*)", uchar);

                string password = String.Format(@"(({0}|;|\?|&|=)*)", uchar);

                string login = String.Format(@"(({0}(:{1}){{0,1}}@){{0,1}}{2})", user, password, hostport);

                string fsegment = String.Format(@"(({0}|\?|:|@|&|=)*)", uchar);

                string ftptype = @"(A|I|D|a|i|d)";

                string fpath = String.Format(@"({0}(/{0})*)", fsegment);

                string ftpurl = String.Format(@"ftp://{0}(/{1}(;type={2}){{0,1}}){{0,1}}", login, fpath, ftptype);

                #endregion

     

    #region News

     

                string group = String.Format(@"({0}({0}|{1}|-|\.|\+|_)*)", alpha, digit);

                string article = String.Format(@"(({0}|;|/|\?|:|&|=)+@{1})", uchar, host);

                string grouppart = String.Format(@"(\*|{0}|{1})", group, article);

                string newsurl = String.Format(@"(news:{0})", grouppart);

     

                #endregion

    #region Nntpurl

     

                string nntpurl = String.Format(@"nntp://{0}/{1}(/{2}){{0,1}}", hostport, group, digits);

     

                #endregion

    #region Telnet

     

                string telneturl = String.Format(@"telnet://{0}/{{0,1}}", login);

     

                #endregion

    #region Gopher

     

                string gtype = xchar;

                string selector = String.Format(@"({0}*)", xchar);

                string gopherplus_string = String.Format(@"({0}*)", xchar);

                string gopherurl = String.Format(@"gopher://{0}(/({1}({2}(%09{3}(%09{4}){{0,1}}){{0,1}}){{0,1}}){{0,1}}){{0,1}}", hostport, gtype, selector, search, gopherplus_string);

     

                #endregion

    复制代码

    #region Wais

     

                string database = String.Format(@"({0}*)", uchar);

                string wtype = String.Format(@"({0}*)", uchar);

                string wpath = String.Format(@"({0}*)", uchar);

                string waisdatabase = String.Format(@"(wais://{0}/{1})", hostport, database);

                string waisindex = String.Format(@"(wais://{0}/{1}\?{2})", hostport, database, search);

                string waisdoc = String.Format(@"(wais://{0}/{1}/{2}/{3})", hostport, database, wtype, wpath);

                string waisurl = String.Format(@"{0}|{1}|{2}", waisdatabase, waisindex, waisdoc);

     

                #endregion

    #region Mailto

     

                string encoded822addr = String.Format(@"({0}+)", xchar);

                string mailtourl = String.Format(@"mailto:{0}", encoded822addr);

     

                #endregion

    #region File

     

                string fileurl = String.Format(@"file://({0}{{0,1}}|localhost)/{1}", host, fpath);

     

                #endregion

    #region Prosperourl

     

                string fieldname = String.Format(@"({0}|\?|:|@|&)", uchar);

                string fieldvalue = String.Format(@"({0}|\?|:|@|&)", uchar);

                string fieldspec = String.Format(@"(;{0}={1})", fieldname, fieldvalue);

                string psegment = String.Format(@"(({0}|\?|:|@|&|=)*)", uchar);

                string ppath = String.Format(@"({0}(/{0})*)", psegment);

                string prosperourl = String.Format(@"prospero://{0}/{1}({2})*", hostport, ppath, fieldspec);

     

                #endregion

    #region Otherurl

     

                //otherurl equal genericurl

                string urlpath = String.Format(@"(({0})*)", xchar);

                string scheme = String.Format(@"(({0}|{1}|\+|-|\.)+)", lowalpha, digit);

                string ip_schemepar = String.Format(@"(//{0}(/{1}){{0,1}})", login, urlpath);

                string schemepart = String.Format(@"(({0})*|{1})", xchar, ip_schemepar);

                string genericurl = String.Format(@"{0}:{1}", scheme, schemepart);

                string otherurl = genericurl;

     

                #endregion

    有了Pattern剩下的就简单多了,无非就是正则表达式的验证了,以Http为例:

    Http的pattern为string httpurl,假设要验证的Url为url,所以验证url的代码如下:

    Regex regex = new Regex(httpurl);

    bool isMatchHttp = regex.IsMatch(url);


    转载:http://www.pin5i.com/showtopic-25932.html



    返回导读目录,阅读更多随笔



    分割线,以下为博客签名:

    软件臭虫情未了
    • 编码一分钟
    • 测试十年功


    随笔如有错误或不恰当之处、为希望不误导他人,望大侠们给予批评指正。

  • 相关阅读:
    【LeetCode & 剑指offer刷题】数组题18:Plus One
    SQL Server 2005 的动态管理视图DMV和函数DMF
    数据库SQL优化大总结之 百万级数据库优化方案
    误删SQL Server日志文件后怎样附加数据库
    教你建立SQL数据库的表分区
    Sql Server 阻塞的常见原因和解决办法
    SQL索引优化方法
    详解如何定义SQL Server外关键字约束
    写出高性能SQL语句的十三条法则
    SQL SERVER内部函数大全
  • 原文地址:https://www.cnblogs.com/08shiyan/p/1993844.html
Copyright © 2011-2022 走看看