最近学习devexpress,视频都在http://tv.devexpress.com,一个一个下载又嫌麻烦,于是费劲千辛万苦写了个DEMO来半自动下载,相信还可以再完善。
遇到的一个最困难的问题是:如何判断C#的webBrowser的是否文档加载完成,参考了很多资料,配合某些特定的状态字段判断,完美解决了如何解析AJAX加载的文档内容。
上图效果图:1、输入分类地址,2、点击下载,自动下载分类下的视频。
C#代码
using System; using System.IO; using System.Windows.Forms; using Yejq; namespace devexpresstv { public partial class Form1 : Form { public Form1() { InitializeComponent(); } String[] movies = new string[100]; int i = 0; private void button_downNow_Click(object sender, EventArgs e) { // 添加事件响应函数 webBrowser1.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(webBrowser_DocumentCompleted_downNow); webBrowser1.Url = new Uri(textBox_url.Text); } private void webBrowser_DocumentCompleted_downNow(object sender, WebBrowserDocumentCompletedEventArgs e) { movies = new string[100]; i = 0; int pages = 0; label_log.Text = "开始解析……"; HtmlElementCollection collection = webBrowser1.Document.Links; label_log.Text = "开始解析…………"; foreach (HtmlElement elem in collection) //遍历所有元素 { string href = elem.GetAttribute("href"); if (href.EndsWith(".movie") && !href.StartsWith("mailto:") && !href.Contains("ClinicalTrialASPNET") && !href.Contains("DevelopMultiChannelApps") && !href.Contains("DevToolsROIForrester") && !href.Contains("SchedulingSnap")) { movies[i] = href; i++; } if (href.StartsWith("javascript:ChangePage")) { pages++; } } label_log.Text = "视频数:" + i + ",页数:" + pages; string fileContent = ""; for (int j = 0; j < i; j++) { fileContent += movies[j] + System.Environment.NewLine; } if (i > 0) { StreamWriter writer = new StreamWriter("1.txt"); writer.Write(fileContent + "pages:" + pages); writer.Flush(); writer.Close(); for (int x = i - 1; x >= 0; x--) { label_log.Text = "正在下载:" + movies[x]; webBrowser1.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler( webBrowser_DocumentCompleted_downMovice); webBrowser1.Url = new Uri(movies[x]); } } } private int downTimes = 1; string link = ""; private void webBrowser_DocumentCompleted_downMovice(object sender, WebBrowserDocumentCompletedEventArgs e) { label_log.Text = "开始解析……"; string title = webBrowser1.DocumentTitle; label_log.Text = "标题:" + title; HtmlElementCollection collection = webBrowser1.Document.Links; foreach (HtmlElement elem in collection) //遍历所有元素 { string href = elem.GetAttribute("href"); if (href.EndsWith(".mp4")) { link = href; break; } } if (!string.IsNullOrEmpty(link)) { title = title.Substring(0, title.IndexOf(" - DevExpress Channel Movie")); label_log.Text = "下载地址:" + link; HttpDownLoad downLoad = new HttpDownLoad(); string filePath = @"G:\temp\" + downTimes + " " + title; if (!Directory.Exists(filePath)) { Directory.CreateDirectory(filePath); } string path = filePath + "\\" + link.Substring(link.LastIndexOf("/") + 1); label_log.Text = "下载到:" + path; downLoad.httpDownFile(link, path, progressBar1, label_progress); downTimes++; } } } }
循环下载的时候还是有问题 ~~~ 真麻烦