zoukankan      html  css  js  c++  java
  • C#快速获取指定网页源码的几种方式,并通过字符串截取函数 或 正则 取指定内容(IP)

    //只获取网页源码开始到标题位目的进行测试
    //第一种方式经过测试,稍微快点
     string url = "http://www.ip.cn";
                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
                req.Method = "GET";
                req.ContentType = "application/x-www-form-urlencoded";
                HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                Stream ReceiveStream = res.GetResponseStream();
                Encoding encode = System.Text.Encoding.UTF8;
                StreamReader sr = new StreamReader(ReceiveStream, encode);
    
                string strResult = "";
                Char[] read = new Char[256];
                int count = sr.Read(read, 0, 256);
                while (count > 0)
                {
                    String str = new String(read, 0, count);
                    strResult += str;
                    count = sr.Read(read, 0, 256);
                    if (strResult.IndexOf("</title>") != -1)
                    {
                        break;
                    }
                }
                textBoxTest.Text = strResult;
    
    
    //第二种获取网页源码
    
    WebClient MyClient = new WebClient();
    MyClient.Credentials = CredentialCache.DefaultCredentials;
    MyClient.Headers.Add("Host", "www.kuwo.cn");
    MyClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36");
    Byte[] pageData = MyClient.DownloadData(url);
    //string pageHtml = Encoding.Default.GetString(pageData);  //GB2312  
    string pageHtml = Encoding.UTF8.GetString(pageData); //UTF-8
    
    
    
    //字符串截取,从网页源码中截取 两字符串中间信息
    private string GetStr(string TxtStr, string FirstStr, string SecondStr)
            {
                if (FirstStr.IndexOf(SecondStr, 0) != -1)
                    return "";
                int FirstSite = TxtStr.IndexOf(FirstStr, 0);
                int SecondSite = TxtStr.IndexOf(SecondStr, FirstSite + 1);
                if (FirstSite == -1 || SecondSite == -1)
                    return "";
                return TxtStr.Substring(FirstSite + FirstStr.Length, SecondSite - FirstSite - FirstStr.Length);
            }
    
    //正则截取字符串 A.B之间
    string title2 = Regex.Match(title, "(?<="+"A"+").*?(?="+"B"+")").Value;
    //Regex.Match(sUrl, "(?<=A).*?(?=B)").Value;
    
     //自定义函数
    private string MyGetTitle(string url, string endTag, string startStr, string endStr)
            {
                try
                {
                    //HttpWebRequest类继承于WebRequest,并没有自己的构造函数,需通过WebRequest的Creat方法 建立,并进行强制的类型转换
                    HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
                    req.Method = "GET";
                    req.ContentType = "text/html;charset=UTF-8";
                    //通过HttpWebRequest的GetResponse()方法建立HttpWebResponse,强制类型转换
                    HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                    //若成功取得网页的内容,则以System.IO.Stream形式返回,
                    //若失败则产生ProtoclViolationException错 误。
                    //在此正确的做法应将以下的代码放到一个try块中处理。这里简单处理
                    Stream ReceiveStream = res.GetResponseStream();
                    //返回的内容是Stream形式的,所以可以利用StreamReader类获取GetResponseStream的内容,
                    //并以StreamReader类的Read方法依次读取网页源程序代码每一行的内容,直至行尾(读取的编码格式:UTF8) 
                    StreamReader sr = new StreamReader(ReceiveStream, Encoding.UTF8);
                    string strResult = "";
                    Char[] read = new Char[256];
                    //Read(char[] buffer,int index,int count);
                    //从文件流的第index个位置开始读,到count个字符,把它们存到buffer中,
                    //然后返回一个正数,内部指针后移一位,保证下次从新的位置开始读。
                    int count = sr.Read(read, 0, 256);
                    while (count > 0)
                    {
                        String str = new String(read, 0, count);
                        strResult += str;
                        count = sr.Read(read, 0, 256);
                        if (strResult.IndexOf(endTag) != -1) break;
                    }
                    res.Close();
                    ReceiveStream.Dispose();
                    return Regex.Match(strResult, "(?<=" + startStr + ").*?(?=" + endStr + ")").Value;
                }
                catch (Exception ex)
                {
                    LogAdd(ListBoxDownLog, "异常:" + ex.Message);
                    throw;
                }
    
            }
  • 相关阅读:
    Linux下Fortran多文件编译
    java用poi实现对word读取和修改操作
    SQL DATEDIFF语法及时间函数 Sql 查询当天、本周、本月记录
    深入Java集合学习系列:LinkedHashSet的实现原理
    Log4Net日志
    程序员创业如何才能成功?
    Asp.net 数据采集基类(远程抓取,分解,保存,匹配)
    response.setContentType()的String参数及对应类型
    深入Java集合学习系列:LinkedHashMap的实现原理
    深入Java集合学习系列:HashSet的实现原理
  • 原文地址:https://www.cnblogs.com/tmdsleep/p/5849498.html
Copyright © 2011-2022 走看看