using System; using HtmlAgilityPack; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Collections.Generic; namespace ConsoleApp { class Program { static string goText(HtmlNode _htmlnode, bool isSplit = true) { string str = ""; try { // 获取text内容 str = _htmlnode.InnerText; // 消除多余的符号 str = Regex.Replace(str, " | | | ", "").Trim(); // 切割字符串 if (isSplit && str.IndexOf(":") >= 0) { str = str.Split(':')[1]; } } catch { } return str; } static void Main(string[] args) { // 获取index.html的内容 string basePath = AppDomain.CurrentDomain.BaseDirectory + "/index.html"; string html = ""; if (File.Exists(@basePath)) { html = File.ReadAllText(@basePath, Encoding.Default); } // 开始计算耗时 DateTime beforDT = System.DateTime.Now; // 使用HtmlAgilityPack解析它 var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(html); // 报告编号 var report_number = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[1]/tbody/tr[2]/td[1]")); // 查询时间 var query_time = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[1]/tbody/tr[2]/td[2]")); // 报告时间 var report_time = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[1]/tbody/tr[2]/td[3]")); // 姓名 var report_name = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[1]")); // 证件类型 var report_type = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[2]")); // 证件号码 var report_id = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[3]")); // 婚姻 var report_marriage = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[4]")); // 表格 var table_tr = htmlDoc.DocumentNode.SelectNodes("/html/body/div/div/table/tr[2]/td/table[4]/tr[3]/td/table/tbody/tr/td/table/tbody/tr"); List<Table> list = new List<Table>(); // 遍历所有的tr foreach (var node in table_tr) { // 跳过第一次遍历吧 if (node.NodeType == HtmlNodeType.Element) { // 获取所有的Td var tds = node.Elements("td"); Table tb = new Table(); int i = 0; // 遍历所有的Td foreach (var td in tds) { if (td.NodeType == HtmlNodeType.Element) { string text = goText(td, false); // 使用比较蠢的方式赋值,自己想办法优化 switch (i) { case 0: tb.a = text; break; case 1: tb.b = text; break; case 2: tb.c = text; break; case 3: tb.d = text; break; } } i++; } list.Add(tb); } } // 删除第一个节点。我不需要表头 list.RemoveAt(0); Console.Write(list); // 结算程序耗时 DateTime afterDT = System.DateTime.Now; TimeSpan ts = afterDT.Subtract(beforDT); Console.WriteLine("DateTime总共花费{0}ms.", ts.TotalMilliseconds); Console.ReadLine(); } } public class Table { /// <summary> /// a /// </summary> public string a { get; set; } /// <summary> /// b /// </summary> public string b { get; set; } /// <summary> /// c /// </summary> public string c { get; set; } /// <summary> /// c /// </summary> public string d { get; set; } } }