zoukankan      html  css  js  c++  java
  • Winista.Text.HtmlParser; 获取html

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Net;
    using System.IO;
    using System.Text.RegularExpressions;

    using Winista.Text.HtmlParser;
    using Winista.Text.HtmlParser.Lex;
    using Winista.Text.HtmlParser.Util;
    using Winista.Text.HtmlParser.Tags;
    using Winista.Text.HtmlParser.Filters;

    namespace Leo.GetOrganzation
    {
        public partial class FormLeo : Form
        {
            IList<string> siteList = new List<string>();

            public FormLeo()
            {
                InitializeComponent();
                this.InitSet();
            }

            private void InitSet()
            {
                this.comboBoxArea.SelectedIndex = 0;
            }

            private void comboBoxArea_SelectedIndexChanged(object sender, EventArgs e)
            {
                string strUrl = string.Empty;
                switch (this.comboBoxArea.SelectedIndex)
                {
                    case 0:
                        strUrl = string.Empty;
                        break;
                    case 1:
                        strUrl = "http://www.alexa.com/topsites/global;{0}";
                        break;
                    case 2:
                        strUrl = "http://www.alexa.com/topsites/countries;{0}/CN";
                        break;
                    case 3:
                        strUrl = "http://www.alexa.com/topsites/countries;{0}/HK";
                        break;
                    case 4:
                        strUrl = "http://www.alexa.com/topsites/countries;{0}/MO";
                        break;
                    case 5:
                        strUrl = "http://www.alexa.com/topsites/countries;{0}/TW";
                        break;
                }

                this.txtUrlText.Text = strUrl;

                this.textBox1.Text = "";
                this.textBox2.Text = "";
                this.textBox3.Text = "";

            }

            private void btnGetRes_Click(object sender, EventArgs e)
            {
                siteList = new List<string>();
                if (this.comboBoxArea.SelectedIndex == 0)
                {
                    MessageBox.Show("请选择区域");
                    return;
                }

                for (int i = 0; i < int.Parse(this.textBox3.Text.Trim()); i++)
                {

                    string urlLink = string.Format(this.txtUrlText.Text.Trim(),i);
                    string temp = this.GetRes(urlLink);

                    this.GetSiteName(temp);

                    System.Threading.Thread.Sleep(1000);
                    Application.DoEvents();
                }

                ////string temp = this.GetRes("http://www.alexa.com/topsites/countries;19/TW");

                ////this.GetSiteName(temp);

           
                MessageBox.Show("获取完毕,待导出");
            }

            private void GetSiteName(string strhtml)
            {
                Parser parser = Parser.CreateParser(strhtml, null);
                NodeList nodes = parser.ExtractAllNodesThatMatch(new TagNameFilter("h2"));
              
                for (int i = 0; i < nodes.Count; i++)
                {
                    siteList.Add(nodes[i].Children[1].ToPlainTextString());
                }
               
            }

            private string GetRes(string Url)
            {

                string strResult = string.Empty;
                try
                {
                    HttpWebRequest request =(HttpWebRequest)WebRequest.Create(Url);
                    request.Timeout = 30000;
                    request.Headers.Set("Pragma", "no-cache");

                    HttpWebResponse response =(HttpWebResponse)request.GetResponse();

                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding = Encoding.GetEncoding("utf-8");
                    StreamReader streamReader =new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch(Exception) {}

                return strResult;
            }

            private void WriteTxt()
            {
                string sucessFile = Application.StartupPath;
                string selectText = this.comboBoxArea.SelectedItem.ToString();
                if (File.Exists(string.Format("{0}\{1}.txt", sucessFile, selectText)))
                {
                    File.Delete(string.Format("{0}\{1}.txt", sucessFile, selectText));
                }
                StreamWriter sw = new StreamWriter(string.Format("{0}\{1}.txt", sucessFile, selectText),true);
                try
                {
                    foreach (string item in siteList)
                    {
                        sw.WriteLine(item+" "+selectText);
                    }
                    sw.Dispose();
                    sw.Close();
                    MessageBox.Show("导出OK");
                   
                }
                catch (Exception)
                {
                }
            }

            private void button1_Click(object sender, EventArgs e)
            {
                if (this.comboBoxArea.SelectedIndex == 0)
                {
                    MessageBox.Show("请选择区域");
                    return;
                }
                if (siteList.Count == 0)
                {
                    MessageBox.Show("没有数据");
                    return;
                }
                WriteTxt();
            }

            private void textBox3_TextChanged(object sender, EventArgs e)
            {
               
                int countPage = int.Parse(this.textBox3.Text.Trim());
                this.textBox1.Text = string.Format(this.txtUrlText.Text.Trim(), 0);
                this.textBox2.Text = string.Format(this.txtUrlText.Text.Trim(), countPage - 1);
            }


        }
    }

  • 相关阅读:
    centos6和centos7升级openssh7.5脚本
    开通telnet服务,使用telnet登入
    彻底删除kafka的topic以及其中的数据
    redis集群创建
    curl 命令参数
    nginx.conf配置文件详解,白嫖的
    logstash迁移es数据
    es 常用查询
    pl/sql 存储过程
    es查看集群信息命令_cat和_cluster
  • 原文地址:https://www.cnblogs.com/binbinxiong/p/3374641.html
Copyright © 2011-2022 走看看