zoukankan      html  css  js  c++  java
  • HttpWebRequest及正则表达式

          

     近日做了一下采集某个网页的内容,并获取其中所有的链接地址及链接标题。

     其中用到了HttpWebRequest和正则表达式,代码备忘如下:
     

     //WebClient wc = new WebClient();
    //NetworkCredential nc = new NetworkCredential("用户名", "密码", "域名");
            
    //wc.Credentials = nc;
            
    //Response.Write(Server.HtmlEncode(wc.DownloadString("地址")));

            HttpWebRequest req 
    = (HttpWebRequest)WebRequest.Create("地址");
            req.Credentials 
    = new NetworkCredential("用户名""密码""域名");
            req.Method 
    = "GET";        
            IAsyncResult ir 
    = req.BeginGetResponse(nullnull);
            ir.AsyncWaitHandle.WaitOne();
            
    try {
                HttpWebResponse response1 
    = (HttpWebResponse)req.EndGetResponse(ir);
                System.IO.Stream stream 
    = response1.GetResponseStream();
                sReader 
    = new System.IO.StreamReader(stream, System.Text.Encoding.GetEncoding("GB2312"));
                
    if (null != sReader) {
                
    string pattern = @"<a(?:\s*?)href=['|""](?<url>[\s\S]+?)['|""]>(?<title>[\s\S]+?)</a>";
                System.Text.RegularExpressions.MatchCollection matchs 
    = System.Text.RegularExpressions.Regex.Matches(sReader.ReadToEnd(), pattern);
                
    if (matchs.Count <= 0)
                    Response.Write(
    "没有匹配项");
                
    else
                
    {
                    
    for(int i=0;i<50;i++)
                    
    {
                        Response.Write(
    "链接:" + matchs[i].Groups["url"].Value+"___名称:"+matchs[i].Groups["title"].Value+"<br />");
                    }

                }

                }

            }

            
    catch (System.Exception ex) {
                Response.Write(ex.Message);
            }

            
    finally {
                
    if (null != sReader) {
                    sReader.Dispose();
                }

            }

    这其中,正则表达式迷糊了我一会儿:因为没有使用惰性匹配,导致每一次都只能匹配到一条信息。。。。

  • 相关阅读:
    BEC listen and translation exercise 44
    中译英12
    BEC listen and translation exercise 43
    中译英11
    BEC listen and translation exercise 42
    中译英10
    BEC listen and translation exercise 41
    中译英9
    BEC listen and translation exercise 40
    中译英8
  • 原文地址:https://www.cnblogs.com/McJeremy/p/1495475.html
Copyright © 2011-2022 走看看