zoukankan      html  css  js  c++  java
  • 使用HtmlAgilityPack解析Html(非常好用)

     /// <summary>
        /// 设计成一个exe,解决WebBrowser控件内存泄漏的问题.
        /// </summary>
        public partial class MainForm : Form
        {
            /// <summary>
            /// 是否处理完成
            /// </summary>
            private bool isCompleted; //webBrowser只能运行在UI线程上,所以这里不用信号通知,而用一个变量,不断检查这个变量的状态
    
            /// <summary>
            /// 处理结果
            /// </summary>
            private List<RowData> executeResult = new List<RowData>();
    
            private static MainForm instance = new MainForm();
            /// <summary>
            /// 单件实例
            /// </summary>
            public static MainForm Instance { get { return instance; } }
    
            private MainForm()
            {
                InitializeComponent();
                webBrowser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(webBrowser_DocumentCompleted);
            }
    
            private void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                if (this.webBrowser.ReadyState != WebBrowserReadyState.Complete)
                    return;
    
                var txt = webBrowser.Document.Body.InnerText;
                var html = webBrowser.Document.Body.InnerHtml;
                if (webBrowser.Document.Title == "选择")
                {
                    var items = ExtractData(html);
                    executeResult.AddRange(items);
                    isCompleted = true;
                }
            }
    
            private List<RowData> ExtractData(string html)
            {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
    
                HtmlAgilityPack.HtmlNode node = doc.GetElementbyId("div");
                var trNodes = node.SelectNodes("tbody/tr");
    
                List<RowData> rows = new List<RowData>();
                foreach (var trNode in trNodes)
                {
                    var tdNode = trNode.SelectNodes("td/div");
                    RowData row = new RowData();
                    rows.Add(row);
    
                    row.航班 = tdNode[0].InnerText;
                    row.出发时间 = tdNode[1].InnerText;
                    row.到达时间 = tdNode[2].InnerText;
                    row.机场 = tdNode[3].InnerText;
                    row.机型 = tdNode[4].InnerText;
                    row.头等 = tdNode[5].InnerText;
                    row.公务 = tdNode[6].InnerText;
                    row.全价 = tdNode[7].InnerText;
                    row.折扣 = tdNode[8].InnerText;
                    row.特价 = tdNode[9].InnerText;
                }
    
                return rows;
            }
    
            /// <summary>
            /// 查询数据
            /// </summary>
            /// <param name="fromCity">出发城市代码</param>
            /// <param name="toCity">到达城市代码</param>
            /// <param name="date">出发日期</param>
            /// <param name="timeout">超时时间</param>
            /// <returns>机票信息</returns>
            [MethodImpl(MethodImplOptions.Synchronized)]
            public List<RowData> Query(string fromCity, string toCity, DateTime date, TimeSpan timeout)
            {
                isCompleted = false;
                executeResult.Clear();
    
                string urlTemplate = "http://www.xxx.com";
                string url = string.Format(urlTemplate, fromCity, date.Month, date.Day, date.Year, toCity);
                Navigate(url);
    
                DateTime startTime = DateTime.Now;
                //未处理完,且没有超时,则等待
                while (!isCompleted && startTime.Add(timeout) > DateTime.Now)
                {
                    Thread.Sleep(100);
                    Application.DoEvents();
                }
    
                return executeResult;
            }
    
            private void Navigate(string url)
            {
                if (InvokeRequired)
                {
                    BeginInvoke(new Action<string>(Navigate), url);
                    return;
                }
    
                webBrowser.Navigate(url);
            }
        }
    
        /// <summary>
        /// 对应到页面上的每一行数据
        /// 不喜欢中文请自行修改
        /// </summary>
        public class RowData
        {
            public string 航班 { get; set; }
            public string 出发时间 { get; set; }
            public string 到达时间 { get; set; }
            public string 机场 { get; set; }
            public string 机型 { get; set; }
            public string 头等 { get; set; }
            public string 公务 { get; set; }
            public string 全价 { get; set; }
            public string 折扣 { get; set; }
            public string 特价 { get; set; }
        }
  • 相关阅读:
    Spring Could not find unique TaskExecutor bean 错误
    Postman 测试 API 如何上传文件
    Spring Boot 项目上传日志到 Azure Application Insights
    Spring Boot 和 Hibernate 的 H2 数据库配置来进行启动测试
    android TextView多行数据显示
    MarkDown 查看器 typora
    Ubuntu16.04多个版本python编译器的安装和切换
    关于LPC824Lite开发板下载程序时提示"Invalid ROM Table"
    8寸防震三防平板电脑Windows/安卓
    HaaS100 OLED信息屏显示案例
  • 原文地址:https://www.cnblogs.com/xiaowy/p/3312673.html
Copyright © 2011-2022 走看看