//只获取网页源码开始到标题位目的进行测试 //第一种方式经过测试,稍微快点 string url = "http://www.ip.cn"; HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); req.Method = "GET"; req.ContentType = "application/x-www-form-urlencoded"; HttpWebResponse res = (HttpWebResponse)req.GetResponse(); Stream ReceiveStream = res.GetResponseStream(); Encoding encode = System.Text.Encoding.UTF8; StreamReader sr = new StreamReader(ReceiveStream, encode); string strResult = ""; Char[] read = new Char[256]; int count = sr.Read(read, 0, 256); while (count > 0) { String str = new String(read, 0, count); strResult += str; count = sr.Read(read, 0, 256); if (strResult.IndexOf("</title>") != -1) { break; } } textBoxTest.Text = strResult; //第二种获取网页源码 WebClient MyClient = new WebClient(); MyClient.Credentials = CredentialCache.DefaultCredentials; MyClient.Headers.Add("Host", "www.kuwo.cn"); MyClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"); Byte[] pageData = MyClient.DownloadData(url); //string pageHtml = Encoding.Default.GetString(pageData); //GB2312 string pageHtml = Encoding.UTF8.GetString(pageData); //UTF-8 //字符串截取,从网页源码中截取 两字符串中间信息 private string GetStr(string TxtStr, string FirstStr, string SecondStr) { if (FirstStr.IndexOf(SecondStr, 0) != -1) return ""; int FirstSite = TxtStr.IndexOf(FirstStr, 0); int SecondSite = TxtStr.IndexOf(SecondStr, FirstSite + 1); if (FirstSite == -1 || SecondSite == -1) return ""; return TxtStr.Substring(FirstSite + FirstStr.Length, SecondSite - FirstSite - FirstStr.Length); } //正则截取字符串 A.B之间 string title2 = Regex.Match(title, "(?<="+"A"+").*?(?="+"B"+")").Value; //Regex.Match(sUrl, "(?<=A).*?(?=B)").Value; //自定义函数 private string MyGetTitle(string url, string endTag, string startStr, string endStr) { try { //HttpWebRequest类继承于WebRequest,并没有自己的构造函数,需通过WebRequest的Creat方法 建立,并进行强制的类型转换 HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); req.Method = "GET"; req.ContentType = "text/html;charset=UTF-8"; //通过HttpWebRequest的GetResponse()方法建立HttpWebResponse,强制类型转换 HttpWebResponse res = (HttpWebResponse)req.GetResponse(); //若成功取得网页的内容,则以System.IO.Stream形式返回, //若失败则产生ProtoclViolationException错 误。 //在此正确的做法应将以下的代码放到一个try块中处理。这里简单处理 Stream ReceiveStream = res.GetResponseStream(); //返回的内容是Stream形式的,所以可以利用StreamReader类获取GetResponseStream的内容, //并以StreamReader类的Read方法依次读取网页源程序代码每一行的内容,直至行尾(读取的编码格式:UTF8) StreamReader sr = new StreamReader(ReceiveStream, Encoding.UTF8); string strResult = ""; Char[] read = new Char[256]; //Read(char[] buffer,int index,int count); //从文件流的第index个位置开始读,到count个字符,把它们存到buffer中, //然后返回一个正数,内部指针后移一位,保证下次从新的位置开始读。 int count = sr.Read(read, 0, 256); while (count > 0) { String str = new String(read, 0, count); strResult += str; count = sr.Read(read, 0, 256); if (strResult.IndexOf(endTag) != -1) break; } res.Close(); ReceiveStream.Dispose(); return Regex.Match(strResult, "(?<=" + startStr + ").*?(?=" + endStr + ")").Value; } catch (Exception ex) { LogAdd(ListBoxDownLog, "异常:" + ex.Message); throw; } }