zoukankan      html  css  js  c++  java
  • 用C#.net开发的当当网信息查询工具

    学生时代的小玩具

    用C#.net开发的一个用来抓取当当网计算机类图书信息的工具


    Program.cs

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Windows.Forms;
    namespace spider
    {
        static class Program
        {
            /// <summary>
            /// 应用程序的主入口点。
            /// </summary>
            [STAThread]
            static void Main()
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);
                Application.Run(new Form1());
            }
        }
    }

    Form1.cs

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.IO;
    namespace spider
    {
        public partial class Form1 : Form
        {
            private string url = @"http://category.dangdang.com/all/?category_path=01.54.26.00.00.00&page_index=";
            private static int page = 1;
            private Parse p;
            public Form1()
            {
                InitializeComponent();
            }
            private void buttonstart_Click(object sender, EventArgs e)
            {
                page = 1;
                Execute();
            }
            private void buttonprev_Click(object sender, EventArgs e)
            {
                page--;
                Execute();
            }
            private void buttonnext_Click(object sender, EventArgs e)
            {
                page++;
                Execute();
            }
            private void buttonjump_Click(object sender, EventArgs e)
            {
                page = int.Parse(textBox2.Text);
                Execute();
            }
            private void Execute()
            {
                webBrowser1.Navigate(url + page.ToString());
                textBox1.Text = url + page.ToString();
                Cursor.Current = Cursors.WaitCursor;
            }
            private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                HtmlDocument doc = webBrowser1.Document;
                p = new Parse(doc);
                DataTable dt = p.dt;
                dataGridView1.DataSource = dt;
                dataGridView1.Columns[0].Width = 150;
                dataGridView1.Columns[1].Width = 150;
                dataGridView1.Columns[2].Width = 150;
                dataGridView1.Columns[3].Width = 80;
                dataGridView1.Columns[4].Width = 450;
                Cursor.Current = Cursors.Default;
                MessageBox.Show("解析完成");
            }
            private void buttonsave_Click(object sender, EventArgs e)
            {
                SaveFileDialog sfd = new SaveFileDialog();
                sfd.DefaultExt = "txt";
                if (sfd.ShowDialog() == DialogResult.OK)
                {
                    string path = sfd.FileName;
                    StringBuilder sb = new StringBuilder();
                    List<Book> list = p.list;
                    foreach (Book book in list)
                    {
                        sb.Append(book.ToString());
                    }
                    string text = sb.ToString();
                    File.AppendAllText(path, text, Encoding.Default);
                    MessageBox.Show("保存成功\n" + path);
                }
            }
        }
    }


    Book.cs

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    namespace spider
    {
        class Book
        {
            public string name { get; set; }
            public string author { get; set; }
            public string pub { get; set; }
            public string time { get; set; }
            public string describ { get; set; }
            public Book()
            {
            }
            public Book(string name, string author, string pub, string time, string describ)
            {
                this.name = name;
                this.author = author;
                this.pub = pub;
                this.time = time;
                this.describ = describ;
            }
            public override string ToString()
            {
                return "书名:" + name + "\r\n"
                    + "作者:" + author + "\r\n"
                    + "出版商:" + pub + "\r\n"
                    + "出版时间:" + time + "\r\n"
                    + "描述:" + describ + "\r\n\r\n";
            }
        }
    }

    Parse.cs

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Data;
    using System.Text.RegularExpressions;
    namespace spider
    {
        class Parse
        {
            private HtmlDocument dom;
            public DataTable dt { get; set; }
            public List<Book> list { get; set; }
            public Parse(HtmlDocument dom)
            {
                this.dom = dom;
                dt = new DataTable();
                list = new List<Book>();
                dt.Columns.Add("书名");
                dt.Columns.Add("作者");
                dt.Columns.Add("出版社");
                dt.Columns.Add("出版时间");
                dt.Columns.Add("描述");
                Execute();
            }
            public void Execute()
            {
                HtmlElementCollection els = dom.GetElementsByTagName("div");
                foreach (HtmlElement el in els)
                {
                    if (el.GetAttribute("classname") == "listitem detail")//图书信息
                    {
                        Book book = new Book();
                        HtmlElementCollection els2 = el.GetElementsByTagName("li");
                        foreach (HtmlElement el2 in els2)
                        {
                            if (el2.GetAttribute("classname") == "maintitle")//书名
                            {
                                book.name = el2.OuterText;
                            }
                            if (el2.GetAttribute("classname") == "publisher_info")
                            {
                                HtmlElementCollection els3 = el2.GetElementsByTagName("a");
                                StringBuilder sb = new StringBuilder();
                                foreach (HtmlElement el3 in els3)
                                {
                                    if (el3.GetAttribute("name") == "Author")//作者
                                    {
                                        if (sb.Length==0)
                                        {
                                            sb.Append(el3.OuterText);
                                        }
                                        else
                                        {
                                            sb.Append("," + el3.OuterText);
                                        }
                                    }
                                    if (el3.GetAttribute("name") == "Pub")//出版商
                                    {
                                        book.pub = el3.OuterText;
                                    }
                                }
                                book.author = sb.ToString();
                                Regex r = new Regex(@"(\d{4})\-(\d{2})\-(\d{2})");
                                Match m = r.Match(el2.OuterText);
                                if (m.Success)//出版时间
                                {
                                    book.time = m.Value;
                                }
                            }
                            if (el2.GetAttribute("classname") == "describ")//描述
                            {
                                book.describ = el2.OuterText;
                            }
                        }
                        DataRow dr = dt.NewRow();
                        dr["书名"] = book.name;
                        dr["作者"] = book.author;
                        dr["出版社"] = book.pub;
                        dr["出版时间"] = book.time;
                        dr["描述"] = book.describ;
                        dt.Rows.Add(dr);
                        list.Add(book);
                    }
                }
            }
        }
    }



  • 相关阅读:
    ZOJ 3795 Grouping
    ZOJ 3791 An Easy Game
    ZOJ 3790 Consecutive Blocks
    POJ 1451 T9
    POJ 1141 Brackets Sequence
    POJ 2411 Mondriaan's Dream
    POJ 2513 Colored Sticks
    Eclipse 快捷键大全
    C# lock关键字(多线程)
    C# 内部类
  • 原文地址:https://www.cnblogs.com/hanfeihan1992/p/4504078.html
Copyright © 2011-2022 走看看