解析HTML源码
1,正则获取:
1 string str_pattern = "(?<FlightNo>[A-Z]{2}[0-9]{4})\s*(?<Cabin>[A-Z0-9]{1,2})\s*(?<FlghtDate>[A-Z]{2}[0-9]{2}[A-Z]{3})\s*(?<FromTo>[A-Z]{6})\s*(?<Statu>[A-Za-z]{2}[0-9]{1})\s*(?<FromDt>[0-9]{4})\s*(?<ToDt>[0-9]{4})"; 2 Regex regex = new Regex(str_pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant); 3 if (regex.IsMatch(str_html_part2)) 4 { 5 string FlightNo = ""; 6 string Cabin = ""; 7 string FlghtDate = ""; 8 string FromTo = ""; 9 string Statu = ""; 10 string FromDt = ""; 11 string ToDt = ""; 12 MatchCollection matchCollection = regex.Matches(str_html_part2); 13 foreach (Match match in matchCollection) 14 { 15 FlightNo = match.Groups["FlightNo"].Value.Trim(); 16 Cabin = match.Groups["Cabin"].Value.Trim(); 17 FlghtDate = match.Groups["FlghtDate"].Value.Trim(); 18 FromTo = match.Groups["FromTo"].Value.Trim(); 19 Statu = match.Groups["Statu"].Value.Trim(); 20 FromDt = match.Groups["FromDt"].Value.Trim(); 21 ToDt = match.Groups["ToDt"].Value.Trim(); 22 //public bool IsExistFlight(string allcout,string cabin,string FromDt,string ToDt, string sp_code, string fromcity, string tocity, string fromdate) 23 } 24 25 }
1 if (!Regex.IsMatch(match.Groups["Result"].Value.ToString(), @"^[0-9]*$"))
Regex regex = new Regex(strPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (regex.IsMatch(htmlContent)) { MatchCollection matchCollection = regex.Matches(htmlContent); foreach (Match match in matchCollection) { string value = match.Value;//获取到的 } }
测试解析PNR航班
rt编码信息 如:
RTAAAAAA 1.MENG/HONG MS 2.ZHANG/DEPING MR 3.ZHANG/MUHAN MS AAAAAA 4. NH964 W TU18JUL PEKHND HK3 0825 1250 SEAME 3 I 5. NH963 V MO24JUL HNDPEK HK3 1720 2010 SEAME I 3 6.SZX/T SZX/T 0755-82819601/SHENZHEN TIANTAI AIR INTERNATIONAL TRAVEL AGENCY CO.,LTD ABCDEFG 7.18912790711 8.18912790711 9.TL/0625/18JUL/SZX000 10.SSR ADTK 1E TO NH BY 30JUN 1200 OTHERWISE WILL BE XLD 11.SSR DOCS NH HK1 P/CN/G42027462/CN/13OCT68/F/27APR20/MENG/HONG/P1 12.SSR DOCS NH HK1 P/CN/G40834536/CN/08SEP66/M/25MAY20/ZHANG/DEPING/P2 13.SSR DOCS NH HK1 P/CN/E81525458/CN/07MAY99/F/19JUL26/ZHANG/MUHAN/P3 + PN 14.SSR CTCM NH HK1 18912790729/P3 - 15.OSI NH CTCT18912790729 16.RMK TJ AUTH SZV000/T 17.RMK 备注信息 18.RMK 1A/M42ROX 19.SZX000
解析方法
1 public OrderView GetOrderViewByRtPNRTxt(string pnrtxt, ref string msg) 2 { 3 ILogHandle handle = new ILogHandle(userid, "1E", "SELF", "解析RTPNR"); 4 DateTime _now = DateTime.Now; 5 int restime = 0; 6 7 OrderView result = new OrderView(); 8 HttpUtils http = new HttpUtils(); 9 string pnr = ""; 10 string strreq = pnrtxt; 11 string strrsp = ""; 12 Regex rex = new Regex("\s*(MR|MS|MRS|MISS)\s*(?<PNR>[a-zA-Z0-9]{6})", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Multiline); 13 if (!rex.IsMatch(pnrtxt)) 14 { 15 msg = "未解析到PNR"; 16 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 17 handle.Error(strreq, strrsp, "未解析到PNR", restime); 18 return result; 19 } 20 pnr = rex.Matches(pnrtxt)[0].Groups["PNR"].Value.Trim(); 21 int seq = 1; 22 try 23 { 24 string str_pattern = "(?<FlightNo>[0-9A-Z]{2}[0-9]{1,4})\s*(?<Cabin>[A-Z0-9]{1,2})\s*(?<FlghtDate>[A-Z]{2}[0-9]{2}[A-Z]{3})\s*(?<FromTo>[A-Z]{6})\s*(?<Statu>[A-Za-z]{2}[0-9]{1})\s*(?<FromDt>[0-9]{4}[+]?[1-9]?)\s*(?<ToDt>[0-9]{4}[+]?[1-9]?)"; 25 Regex regex = new Regex(str_pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant); 26 if (!regex.IsMatch(pnrtxt)) 27 { 28 msg = "未匹配到任何航班信息"; 29 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 30 handle.Error(strreq, strrsp, "未匹配到任何航班信息", restime); 31 return result; 32 } 33 result.flights = new List<FlightModel>(); 34 string FlghtDate = ""; 35 string FromTo = ""; 36 DateTime dtfdate = DateTime.Now; 37 DateTime dttrgar = DateTime.Now; 38 39 MatchCollection matchCollection = regex.Matches(pnrtxt); 40 foreach (Match match in matchCollection) 41 { 42 FlghtDate = match.Groups["FlghtDate"].Value.Trim(); 43 FromTo = match.Groups["FromTo"].Value.Trim(); 44 FlightModel f = new FlightModel(); 45 46 dttrgar = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + MakePnrText.GetMonth(FlghtDate.Substring(4)) + "-" + FlghtDate.Substring(2, 2)); 47 if (dttrgar < DateTime.Now) 48 dttrgar = dttrgar.AddYears(1); 49 50 string fdate = dttrgar.ToString("yyyy-MM-dd"); 51 if (seq == 1) 52 { 53 dtfdate = Convert.ToDateTime(fdate); 54 } 55 f.flightno = match.Groups["FlightNo"].Value.Trim(); 56 f.seat = match.Groups["Cabin"].Value.Trim().Substring(0, 1); 57 f.carrier = f.flightno.Substring(0, 2); 58 f.dept = FromTo.Substring(0, 3); 59 f.depttime = GetDatetime(fdate, match.Groups["FromDt"].Value.Trim()); 60 f.arr = FromTo.Substring(3); 61 f.arrtime = GetDatetime(fdate, match.Groups["ToDt"].Value.Trim()); 62 f.sailtype = seq; 63 f.triptype = (f.depttime.Value - dtfdate).TotalDays >= 2 ? 2 : 1; 64 f.optcarrier = f.carrier; 65 f.optflightno = f.flightno; 66 f.state = "Y"; 67 result.flights.Add(f); 68 seq++; 69 } 70 } 71 catch (Exception) 72 { 73 msg = "第" + seq + "段航班信息有误,请核实RT编码文本信息"; 74 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 75 handle.Error(strreq, strrsp, msg, restime); 76 return null; 77 } 78 79 result.passes = new List<PassengerModel>(); 80 seq = 1; 81 DateTime dtnow = DateTime.Now; 82 try 83 { 84 string str_pass = "[.]?(?<name>[A-Z]{2,}/[A-Z]{1,}\s?[A-Z]{0,})\s*(?<sex>MR|MS|MRS|MISS)\s*(" + pnr + ")?"; 85 rex = new Regex(str_pass, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant); 86 if (!rex.IsMatch(pnrtxt)) 87 { 88 msg = "未匹配到任何乘客信息"; 89 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 90 handle.Error(strreq, strrsp, "未匹配到任何乘客信息", restime); 91 return result; 92 } 93 94 MatchCollection matches = rex.Matches(pnrtxt); 95 foreach (Match match in matches) 96 { 97 string name = match.Groups["name"].Value.Trim(); 98 string sex = match.Groups["sex"].Value.Trim(); 99 if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(sex)) 100 { 101 msg = "第" + seq + "位乘客信息有误,请核实RT编码文本信息"; 102 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 103 handle.Error(strreq, strrsp, msg, restime); 104 return null; 105 } 106 PassengerModel p = new PassengerModel(); 107 p.name = name; 108 p.sex = sex == "MR" ? "M" : "F"; 109 p.phone = ""; 110 p.nationality = "CN"; 111 p.birthday = dtnow.AddYears(new Random().Next(-20, -13)); 112 p.cardaddress = "CN"; 113 p.cardexpire = dtnow.AddYears(new Random().Next(2, 10)); 114 p.cardno = "P88888"; 115 p.cardtype = "PP"; 116 p.ptype = 1; 117 result.passes.Add(p); 118 if (seq == 1) 119 result.needpassinfo = "1"; 120 seq++; 121 } 122 //SSR DOCS NH HK1 P/CN/E81525458/CN/07MAY99/F/19JUL26/ZHANG/MUHAN/P3 123 string str_pinfo = "(.SSR DOCS [0-9A-Z]{2} [0-9A-Z]{2}1 P/)(?<cardaddress>[A-Z]{2})/(?<cardno>[0-9A-Z]{2,})/(?<nationality>[A-Z]{2})/(?<birthday>[0-9A-Z]{7})/(?<sex>[A-Z]{1})/(?<cardexpire>[0-9A-Z]{7})/(?<name>[A-Z]{2,}(/)[A-Z]{1,}\s?[A-Z]{0,})(/H)?/P[0-9]{1,}\s*"; 124 rex = new Regex(str_pinfo, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant); 125 if (rex.IsMatch(pnrtxt)) 126 { 127 matches = rex.Matches(pnrtxt); 128 seq = 0; 129 int nowsyear = int.Parse(DateTime.Now.Year.ToString().Substring(3)); 130 string yearpart = "19"; 131 foreach (Match match in matches) 132 { 133 string birthday = match.Groups["birthday"].Value.Trim(); 134 string cardexpire = match.Groups["cardexpire"].Value.Trim(); 135 if (result.passes[seq].name == match.Groups["name"].Value.Trim()) 136 { 137 if (int.Parse(birthday.Substring(5)) < nowsyear) 138 yearpart = "20"; 139 result.passes[seq].cardaddress = match.Groups["cardaddress"].Value.Trim(); 140 result.passes[seq].cardno = match.Groups["cardno"].Value.Trim(); 141 result.passes[seq].nationality = match.Groups["nationality"].Value.Trim(); 142 result.passes[seq].sex = match.Groups["sex"].Value.Trim(); 143 result.passes[seq].birthday = Convert.ToDateTime(yearpart + birthday.Substring(5) + "-" + MakePnrText.GetMonth(birthday.Substring(2, 3)) + "-" + birthday.Substring(0, 2)); 144 result.passes[seq].cardexpire = Convert.ToDateTime("20" + cardexpire.Substring(5) + "-" + MakePnrText.GetMonth(cardexpire.Substring(2, 3)) + "-" + cardexpire.Substring(0, 2)); 145 } 146 seq++; 147 } 148 } 149 150 } 151 catch (Exception) 152 { 153 msg = "第" + seq + "位乘客信息有误,请核实RT编码文本"; 154 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 155 handle.Error(strreq, strrsp, msg, restime); 156 return null; 157 } 158 159 result.extemmsg = pnr; 160 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds); 161 handle.Succes(strreq, strrsp, restime); 162 return result; 163 }
2,HtmlAgilityPack 之 HtmlNode类 (主要是XPath语法解析,firebug插件可以查看对应XPath)
string detailContext="html 源码"; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(detailContext); HtmlNode node = doc.DocumentNode; HtmlNodeCollection trlist = node.SelectNodes("//table[@class='tab_result']//tr[@class='line'][@height='40']");
这里下载:http://www.newtonsoft.com/products/json/
1 List<Models.实体类> list=Newtonsoft.Json.JsonConvert.DeserializeObject<List<Models.实体类>>(context);
1 using (StreamReader reader = new StreamReader(stream)) 2 { 3 string jsonData = reader.ReadToEnd(); 4 // 解析JSON,分析JSON 5 JObject objectRoot = JsonConvert.DeserializeObject(jsonData) as JObject; 6 JArray imgsArray = objectRoot["imgs"] as JArray; 7 for (int i = 0; i < imgsArray.Count; i++) 8 { 9 JObject img = imgsArray[i] as JObject; 10 string objUrl = (string)img["objURL"]; 11 //txtLogs.AppendText(objUrl + Environment.NewLine); // 测试获取图片路径 12 try 13 { 14 // 下载具体的某一张图片 15 DownloadImage(objUrl); 16 // 更新进度条 17 progressBar.BeginInvoke(new Action(() => 18 { 19 progressBar.Value = i * 100 / sumCount; 20 })); 21 // 更新文本框 22 txtLogs.BeginInvoke(new Action(() => 23 { 24 txtLogs.AppendText("已下载:" + objUrl + Environment.NewLine); 25 })); 26 } 27 catch (Exception ex) 28 { 29 // 跨线程访问UI线程的txtLogs控件 30 txtLogs.BeginInvoke(new Action(() => 31 { 32 txtLogs.AppendText("【异常:" + ex.Message + "】" + Environment.NewLine); 33 })); 34 } 35 } 36 }
1 Regex _rexPC = new Regex(@"([d]{1,})件"); 2 string str="成都-昆明,1件,每件23KG,长宽高100*60*40CM;昆明-万象,1件,每件23KG,长宽高100*60*40CM.万象-昆明,1件,每件23KG,长宽高100*60*40CM;昆明-成都,1件,每件23KG,长宽高100*60*40CM"; 3 if (_rexPC.IsMatch(str)) 4 { 5 var _mch = _rexPC.Match(str); 6 int adtpc = StringHelper.StrToInt(_mch.Groups[1].Value, 0); 7 }
有时候获取json数据要解析时需手动写实体类,之前一直手写,感觉太浪费时间了,后面找到了一款工具,可以实现转换功能。