zoukankan      html  css  js  c++  java
  • 使用 WebBrowser 获取Ajax动态加载网页信息

    直接上代码(代码较粗糙,可根据需要优化):

    WebBrowser  直接执行时会报一个单线程的问题,我的解决方法是:使用“STAThread”,指定线程模型为单线程单元

    [STAThread]
    static void Main(string[] args)

    using System;
    using System.IO;
    using System.Net;
    using System.Text;
    using System.Windows.Forms;
    using System.Text.RegularExpressions;
    using System.Collections.Specialized;
    
    namespace CrawlerTest
    {
        public class HttpHelper
        {
            /// <summary>
            /// 下载Ajax Html
            /// </summary>
            /// <param name="url"></param>
            /// <returns></returns>
            public static string DownloadAjaxHtml(string url)
            {
                string htmlstr = null;
                try
                {
                    WebBrowser wb = new WebBrowser();
                    wb.AllowNavigation = true;
                    wb.ScriptErrorsSuppressed = true;
    
                    int hitCount = 1;
                    wb.Navigating += (sender, e) =>
                    {
                        hitCount++;
                    };
    
                    wb.DocumentCompleted += (sender, e) =>
                    {
                        hitCount++;
                    };
    
                    wb.Navigate(url);
    
                    DateTime dtime = DateTime.Now;
                    double timespan = 0;
                    while (timespan <= 3 || wb.ReadyState != WebBrowserReadyState.Complete)
                    {
                        Application.DoEvents();
                        DateTime time2 = DateTime.Now;
                        timespan = (time2 - dtime).TotalSeconds;
                    }
    
                    if (wb.ReadyState == WebBrowserReadyState.Complete)
                    {
                        htmlstr = wb.Document.Body.OuterHtml;
                        htmlstr = System.Web.HttpUtility.UrlDecode(htmlstr);//解码
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"DownloadAjaxHtml-Error:{ex.ToString()}");
                }
    
                return htmlstr;
            }
    		
            //获取Html后再获取想要的内容
            public static List<NewsHotTitle> GetHotTitle(Encoding encoding)
            {
                var url = "http://www.news.cn/2021homepro/rsznb/";
    
                string strHtml = HttpHelper.DownloadAjaxHtml(url);
                if (string.IsNullOrEmpty(strHtml)) { Console.WriteLine($"获取数据失败"); }
    
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(strHtml);
    
                HtmlNode rootnode = doc.DocumentNode;
                HtmlNodeCollection hotlist = rootnode.SelectNodes("//ul[@class='htList']//li");
                if (hotlist == null || !hotlist.Any()) { Console.WriteLine($"获取数据失败"); }
    
                var list = new List<NewsHotTitle>();
                foreach (HtmlNode item in hotlist)
                {
                    NewsHotTitle model = new NewsHotTitle();
                    model.Title = HttpHelper.RemoveHtml(item.InnerHtml);
                    model.PublishTime = DateTime.Now;
    
                    Console.WriteLine($"{model.ToJson()}");
                }
    
                return list;
            }
        }
    }
    

      

  • 相关阅读:
    删除功能ThinkPHP
    详解又详解KMP中的next和nextval的算法
    Thinphp ajax搜索框实施搜索提示
    tp3无法select一条数据记录
    一步步学习springcloud之总览(一)
    使用github搭建自己的maven仓库
    Win7 安装7zip后无7zip右键菜单的解决办法
    基于微信的邮箱新邮件推送
    Apache James 使用MySQL存储启动报错Specified key was too long; max key length is 3072 bytes
    错误提示:Error running MainActivity: Instant Run requires 'Tools | Android | Enable ADB integration' to be enabled.
  • 原文地址:https://www.cnblogs.com/mlinber/p/15683825.html
Copyright © 2011-2022 走看看