static string GetHtml(string url) { HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest; request.Timeout = 16 * 1000; HttpWebResponse response = request.GetResponse() as HttpWebResponse; Stream stream = response.GetResponseStream(); StreamReader reader = new StreamReader(stream, Encoding.Default); string html = reader.ReadToEnd(); stream.Close(); return html; } static string FiltHtml(string htmlStr) { var jsurl = Regex.Match(htmlStr, "(?<=<script type=\"text/javascript\" src=\"/playdata/).*?(?=\"></script>)").Value; var title = Regex.Match(htmlStr, "(?<=<title>正在播放).*?(?=xxx</title>)").Value; var wildQvod = GetHtml(string.Format("http://xxx.com/playdata/{0}", jsurl)); var qvod = Regex.Match(wildQvod,@"(?<=\$).*?(?=\$)").Value; return string.Format("{0}:{1}", title,qvod); } static void Main(string[] args) { StreamWriter sw = new StreamWriter(@"D:\g.txt", true, Encoding.Unicode); sw.AutoFlush = true; var startNum = 787; for (var i = 0; i < 3000; i++) { try { var startUrl = string.Format("http://xxx.com/player/index{0}-0-0.html", startNum + i); var wildHtml = GetHtml(startUrl); var oneData = FiltHtml(wildHtml); oneData = string.Format("{0}:{1}", startNum + i, oneData); sw.WriteLine(oneData); Console.WriteLine(oneData); System.Threading.Thread.Sleep(2000); } catch { var oneData = string.Format("{0}:出错了", i + startNum); Console.WriteLine(oneData); sw.WriteLine(oneData); } } sw.Close(); Console.ReadKey(); }
代码中的域名都用XXX代替了
只供研究学习用,请多注意身体!
此篇文章随意分发,拷贝,传阅。
不要注我的名字,谢谢。