zoukankan      html  css  js  c++  java
  • C#收集网页中的EMail实现源码

    C#收集网页中的EMail实现源码:

         //CAll
            private void GetAllURL(string urlStr)
            {
                new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr); 
                   ...    //处理页面中的Link
                  }
            /**//// <summary>
            /// 提取网页中的Eamil
            /// </summary>
            /// <param name="urlStr">网页地址</param>
            private void GetEmailAddress(object urlStr)
            {
                ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)");   //得到Email
                foreach (object tmp in EmailStrs)
                {
                    Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
                }
            }

            private ArrayList GetWebInfo(string URlStr,string RegExpress)
            {
                //打开指定页
                HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
                webRequest1.Method = "GET";
                HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
                String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();


                //用正则表达式,提取指定内容,带一个变量
                Regex r;
                Match m;
                r = new Regex(RegExpress,   //@"copyTitle.\'(?<AdInfo>.*)\'",
                    RegexOptions.IgnoreCase | RegexOptions.Compiled);
                int pos1=RegExpress.IndexOf("(?<");
                int pos2=RegExpress.IndexOf(">",pos1);
                string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
                string  AdStr = "";
                ArrayList Result = new ArrayList();
                for (m = r.Match(textData); m.Success; m = m.NextMatch())
                {
                    AdStr = m.Result("${" + DestionKey + "}").Trim();   //地址
                    Result.Add(AdStr);
                }
                return Result;
            }

    上述代码中的关键是书写提取EMail的表达式:
                   @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"

  • 相关阅读:
    Android中的数据结构
    Android之makefile
    AndroidMainfest详解
    源码分析之AsyncTask
    源码分析之AsyncTask
    优化apk的odex处理
    arguments简单函数 求整数递加和
    js提取DOM属性和设置DOM属性值
    var定义变量的使用细节
    关于ID命名 一个页面唯一
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1656714.html
Copyright © 2011-2022 走看看