zoukankan      html  css  js  c++  java
  • 关于getHTML()方法和getHtmlAjax()方法 GetHttpLength, 清除HTML标签

    public string getHtml(string Url, string type = "UTF-8")
    {
    try
    {
    System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url);
    System.Net.WebResponse wResp = wReq.GetResponse();
    System.IO.Stream respStream = wResp.GetResponseStream();
    using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type)))
    {
    return reader.ReadToEnd().Replace(" ", "").Replace(" ", "");
    }
    }
    catch (System.Exception ex)
    {
    }
    return "";
    }

    //清除HTML标签

    public String ClearHtml(String str)
    {
    return (String.IsNullOrEmpty(str)) ? String.Empty : System.Text.RegularExpressions.Regex.Replace(str, @"<[^>]*>", String.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    }

    public string getHtmlAjax(string Url, string type = "UTF-8")
    {
    wb = new WebBrowser();
    wb.Navigate(Url);
    while (wb.ReadyState != WebBrowserReadyState.Complete)
    {
    Application.DoEvents();
    }

    System.Timers.Timer timer = new System.Timers.Timer();
    var isComplete = false;
    timer.Elapsed += new System.Timers.ElapsedEventHandler((sender, e) =>
    {
    //加载完毕
    isComplete = true;
    timer.Stop();
    });
    timer.Interval = 1000 * 5;
    timer.Start();
    while (!isComplete)
    Application.DoEvents();
    var htmldocument = (mshtml.HTMLDocument)wb.Document.DomDocument;
    string Content = htmldocument.documentElement.outerHTML.Replace(" ", "").Replace(" ", "");
    Regex reg = new Regex("<DIV class="panelContentWrap".*?下一页</A>");
    Content = reg.Match(Content).Value.ToString();
    reg = new Regex("<TABLE class="ID_table stocks-info-table".*?下一页</A>");
    Content = reg.Match(Content).Value.ToString();
    return Content;
    }

    public long GetHttpLength(string url)
    {
    var length = 0l;
    try
    {
    var req = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url));
    req.Method = "HEAD";
    req.Timeout = 5000;
    var res = (HttpWebResponse)req.GetResponse();
    if (res.StatusCode == HttpStatusCode.OK)
    {
    length = res.ContentLength;
    }

    res.Close();
    return length;
    }
    catch (WebException wex)
    {
    return 0;
    }
    }

  • 相关阅读:
    python第三周练习
    python第一周作业
    SQLite3—数据库的学习—python
    python实现跳一跳辅助的实验报告
    Python——自己的第一个网页(文件的使用)
    第一次爬虫和测试
    numpy和matplotlib使用
    Python作业———预测球队比赛成绩
    PIL库的学习
    Pytho作业——Jieba库的使用和好玩的词云
  • 原文地址:https://www.cnblogs.com/it1042290135/p/5564367.html
Copyright © 2011-2022 走看看