首先是两个读取html的方法。两个编码格式,两个不同的方法(哪个都可以,第二个乱码,待处理)
private static string ReturnUTF8Html() { try { WebClient MyWebClient = new WebClient(); MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据 Byte[] pageData = MyWebClient.DownloadData("http://www.bing.com"); //从指定网站下载数据 // string pageHtml = Encoding.Default.GetString(pageData); //如果获取网站页面采用的是GB2312,则使用这句 string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 // Console.WriteLine(pageHtml);//在控制台输入获取的内容 using (StreamWriter sw = new StreamWriter("F:\\Desktop\\txt2.html"))//将获取的内容写入文本 { sw.Write(pageHtml); } return pageHtml; } catch (WebException webEx) { // Console.WriteLine(webEx.Message.ToString()); return webEx.Message.ToString(); } }
public static string ReturnGB2312Html() { string getWeatherUrl = "http://www.ithome.com/"; //WebRequest webReq = WebRequest.Create(getWeatherUrl); //WebResponse webResp = webReq.GetResponse(); //Stream stream = webResp.GetResponseStream(); Stream 请求字节 = WebRequest.Create(getWeatherUrl).GetResponse().GetResponseStream(); StreamReader 获取内容 = new StreamReader(请求字节, Encoding.GetEncoding("gb2312")); string html = 获取内容.ReadToEnd(); 获取内容.Close(); 请求字节.Close(); //保存网页 FileStream fstream = new FileStream(@"F:\Desktop\txt.html", FileMode.OpenOrCreate, FileAccess.Write); byte[] buffer = Encoding.Default.GetBytes(html); fstream.Write(buffer, 0, buffer.Length); fstream.Close(); fstream.Dispose(); return html; }
抓取文件
public static string 抓取文件(string strUrl) { string strMsg = string.Empty; try { WebRequest request = WebRequest.Create(strUrl); WebResponse response = request.GetResponse(); Stream reader = response.GetResponseStream(); FileStream writer = new FileStream(Program.path, FileMode.OpenOrCreate, FileAccess.Write); // FileStream writer = new FileStream(@"F:\Desktop\Bing"+name+".jpg", FileMode.OpenOrCreate, FileAccess.Write); byte[] buff = new byte[512]; int c = 0; //实际读取的字节数 while ((c = reader.Read(buff, 0, buff.Length)) > 0) { writer.Write(buff, 0, c); } writer.Close(); writer.Dispose(); reader.Close(); reader.Dispose(); response.Close(); strMsg = "保存成功"; } catch { } return strMsg; }
入口
1 static void Main(string[] args) 2 { 3 Console.WriteLine("正在读取网页..."); 4 string html= ReturnUTF8Html(); 5 Console.WriteLine("网页读取正常..."); 6 Console.WriteLine("正在匹配正则表达式..."); 7 // Console.WriteLine(html); 8 //string w = Program.ReturnGB2312Html(); 9 //Console.WriteLine(w); 10 //匹配正则表达式 11 Match match= Regex.Match(html, @"http://s.cn.bing.net/az/hprichbg/rb/[a-zA-Z]+_ZH-CN[0-9]{11}_1920x1080.jpg"); 12 13 Console.WriteLine(match.Value); 14 15 Program.抓取文件(match.Value.ToString()); 16 Console.WriteLine("下载完毕!"); 17 18 Thread.Sleep(3000); 19 // Console.ReadKey(); 20 // SystemParametersInfo(20, 0, "D:\\AAA.bmp", 0x2); 21 }
路径
//路径 public static string path = "E:\\background\\Bing\\bing" + DateTime.Now.ToString("yyyy-MM-dd HH:m:ss") + ".jpg";