zoukankan      html  css  js  c++  java
  • 解析网页源码方式

    解析HTML源码

    1,正则获取:

     1 string str_pattern = "(?<FlightNo>[A-Z]{2}[0-9]{4})\s*(?<Cabin>[A-Z0-9]{1,2})\s*(?<FlghtDate>[A-Z]{2}[0-9]{2}[A-Z]{3})\s*(?<FromTo>[A-Z]{6})\s*(?<Statu>[A-Za-z]{2}[0-9]{1})\s*(?<FromDt>[0-9]{4})\s*(?<ToDt>[0-9]{4})";
     2                 Regex regex = new Regex(str_pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
     3                 if (regex.IsMatch(str_html_part2))
     4                 {
     5                     string FlightNo = "";
     6                     string Cabin = "";
     7                     string FlghtDate = "";
     8                     string FromTo = "";
     9                     string Statu = "";
    10                     string FromDt = "";
    11                     string ToDt = "";
    12                     MatchCollection matchCollection = regex.Matches(str_html_part2);
    13                     foreach (Match match in matchCollection)
    14                     {
    15                         FlightNo = match.Groups["FlightNo"].Value.Trim();
    16                         Cabin = match.Groups["Cabin"].Value.Trim();
    17                         FlghtDate = match.Groups["FlghtDate"].Value.Trim();
    18                         FromTo = match.Groups["FromTo"].Value.Trim();
    19                         Statu = match.Groups["Statu"].Value.Trim();
    20                         FromDt = match.Groups["FromDt"].Value.Trim();
    21                         ToDt = match.Groups["ToDt"].Value.Trim();
    22                         //public bool IsExistFlight(string allcout,string cabin,string FromDt,string ToDt, string sp_code, string fromcity, string tocity, string fromdate)
    23                     }
    24                      
    25                 }
    1  if (!Regex.IsMatch(match.Groups["Result"].Value.ToString(), @"^[0-9]*$"))
    View Code


    Regex regex = new Regex(strPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (regex.IsMatch(htmlContent)) { MatchCollection matchCollection = regex.Matches(htmlContent); foreach (Match match in matchCollection) { string value = match.Value;//获取到的 } }

    测试解析PNR航班 

    rt编码信息 如: 

    RTAAAAAA                                                                       
     1.MENG/HONG MS 2.ZHANG/DEPING MR 3.ZHANG/MUHAN MS AAAAAA                       
     4.  NH964  W   TU18JUL  PEKHND HK3   0825 1250      SEAME  3 I                 
     5.  NH963  V   MO24JUL  HNDPEK HK3   1720 2010      SEAME  I 3                 
     6.SZX/T SZX/T 0755-82819601/SHENZHEN TIANTAI AIR INTERNATIONAL TRAVEL AGENCY   
        CO.,LTD ABCDEFG                                                             
     7.18912790711                                                                  
     8.18912790711                                                                  
     9.TL/0625/18JUL/SZX000                                                         
    10.SSR ADTK 1E TO NH BY 30JUN 1200 OTHERWISE WILL BE XLD                        
    11.SSR DOCS NH HK1 P/CN/G42027462/CN/13OCT68/F/27APR20/MENG/HONG/P1             
    12.SSR DOCS NH HK1 P/CN/G40834536/CN/08SEP66/M/25MAY20/ZHANG/DEPING/P2          
    13.SSR DOCS NH HK1 P/CN/E81525458/CN/07MAY99/F/19JUL26/ZHANG/MUHAN/P3          +
                                                                                   
                                                                                    
                                                                                    
    PN                                                                             
    14.SSR CTCM NH HK1 18912790729/P3                                              -
    15.OSI NH CTCT18912790729                                                       
    16.RMK TJ AUTH SZV000/T                                                         
    17.RMK 备注信息                                                                 
    18.RMK 1A/M42ROX                                                                
    19.SZX000 
    

      

    解析方法

      1 public OrderView GetOrderViewByRtPNRTxt(string pnrtxt, ref string msg)
      2         {
      3             ILogHandle handle = new ILogHandle(userid, "1E", "SELF", "解析RTPNR");
      4             DateTime _now = DateTime.Now;
      5             int restime = 0;
      6 
      7             OrderView result = new OrderView();
      8             HttpUtils http = new HttpUtils();
      9             string pnr = "";
     10             string strreq = pnrtxt;
     11             string strrsp = "";
     12             Regex rex = new Regex("\s*(MR|MS|MRS|MISS)\s*(?<PNR>[a-zA-Z0-9]{6})", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Multiline);
     13             if (!rex.IsMatch(pnrtxt))
     14             {
     15                 msg = "未解析到PNR";
     16                 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     17                 handle.Error(strreq, strrsp, "未解析到PNR", restime);
     18                 return result;
     19             }
     20             pnr = rex.Matches(pnrtxt)[0].Groups["PNR"].Value.Trim();
     21             int seq = 1;
     22             try
     23             {
     24                 string str_pattern = "(?<FlightNo>[0-9A-Z]{2}[0-9]{1,4})\s*(?<Cabin>[A-Z0-9]{1,2})\s*(?<FlghtDate>[A-Z]{2}[0-9]{2}[A-Z]{3})\s*(?<FromTo>[A-Z]{6})\s*(?<Statu>[A-Za-z]{2}[0-9]{1})\s*(?<FromDt>[0-9]{4}[+]?[1-9]?)\s*(?<ToDt>[0-9]{4}[+]?[1-9]?)";
     25                 Regex regex = new Regex(str_pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
     26                 if (!regex.IsMatch(pnrtxt))
     27                 {
     28                     msg = "未匹配到任何航班信息";
     29                     restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     30                     handle.Error(strreq, strrsp, "未匹配到任何航班信息", restime);
     31                     return result;
     32                 }
     33                 result.flights = new List<FlightModel>();
     34                 string FlghtDate = "";
     35                 string FromTo = "";
     36                 DateTime dtfdate = DateTime.Now;
     37                 DateTime dttrgar = DateTime.Now;
     38 
     39                 MatchCollection matchCollection = regex.Matches(pnrtxt);
     40                 foreach (Match match in matchCollection)
     41                 {
     42                     FlghtDate = match.Groups["FlghtDate"].Value.Trim();
     43                     FromTo = match.Groups["FromTo"].Value.Trim();
     44                     FlightModel f = new FlightModel();
     45 
     46                     dttrgar = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + MakePnrText.GetMonth(FlghtDate.Substring(4)) + "-" + FlghtDate.Substring(2, 2));
     47                     if (dttrgar < DateTime.Now)
     48                         dttrgar = dttrgar.AddYears(1);
     49 
     50                     string fdate = dttrgar.ToString("yyyy-MM-dd");
     51                     if (seq == 1)
     52                     {
     53                         dtfdate = Convert.ToDateTime(fdate);
     54                     }
     55                     f.flightno = match.Groups["FlightNo"].Value.Trim();
     56                     f.seat = match.Groups["Cabin"].Value.Trim().Substring(0, 1);
     57                     f.carrier = f.flightno.Substring(0, 2);
     58                     f.dept = FromTo.Substring(0, 3);
     59                     f.depttime = GetDatetime(fdate, match.Groups["FromDt"].Value.Trim());
     60                     f.arr = FromTo.Substring(3);
     61                     f.arrtime = GetDatetime(fdate, match.Groups["ToDt"].Value.Trim());
     62                     f.sailtype = seq;
     63                     f.triptype = (f.depttime.Value - dtfdate).TotalDays >= 2 ? 2 : 1;
     64                     f.optcarrier = f.carrier;
     65                     f.optflightno = f.flightno;
     66                     f.state = "Y";
     67                     result.flights.Add(f);
     68                     seq++;
     69                 }
     70             }
     71             catch (Exception)
     72             {
     73                 msg = "" + seq + "段航班信息有误,请核实RT编码文本信息";
     74                 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     75                 handle.Error(strreq, strrsp, msg, restime);
     76                 return null;
     77             }
     78 
     79             result.passes = new List<PassengerModel>();
     80             seq = 1;
     81             DateTime dtnow = DateTime.Now;
     82             try
     83             {
     84                 string str_pass = "[.]?(?<name>[A-Z]{2,}/[A-Z]{1,}\s?[A-Z]{0,})\s*(?<sex>MR|MS|MRS|MISS)\s*(" + pnr + ")?";
     85                 rex = new Regex(str_pass, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
     86                 if (!rex.IsMatch(pnrtxt))
     87                 {
     88                     msg = "未匹配到任何乘客信息";
     89                     restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
     90                     handle.Error(strreq, strrsp, "未匹配到任何乘客信息", restime);
     91                     return result;
     92                 }
     93 
     94                 MatchCollection matches = rex.Matches(pnrtxt);
     95                 foreach (Match match in matches)
     96                 {
     97                     string name = match.Groups["name"].Value.Trim();
     98                     string sex = match.Groups["sex"].Value.Trim();
     99                     if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(sex))
    100                     {
    101                         msg = "" + seq + "位乘客信息有误,请核实RT编码文本信息";
    102                         restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
    103                         handle.Error(strreq, strrsp, msg, restime);
    104                         return null;
    105                     }
    106                     PassengerModel p = new PassengerModel();
    107                     p.name = name;
    108                     p.sex = sex == "MR" ? "M" : "F";
    109                     p.phone = "";
    110                     p.nationality = "CN";
    111                     p.birthday = dtnow.AddYears(new Random().Next(-20, -13));
    112                     p.cardaddress = "CN";
    113                     p.cardexpire = dtnow.AddYears(new Random().Next(2, 10));
    114                     p.cardno = "P88888";
    115                     p.cardtype = "PP";
    116                     p.ptype = 1;
    117                     result.passes.Add(p);
    118                     if (seq == 1)
    119                         result.needpassinfo = "1";
    120                     seq++;
    121                 }
    122                 //SSR DOCS NH HK1 P/CN/E81525458/CN/07MAY99/F/19JUL26/ZHANG/MUHAN/P3  
    123                 string str_pinfo = "(.SSR DOCS [0-9A-Z]{2} [0-9A-Z]{2}1 P/)(?<cardaddress>[A-Z]{2})/(?<cardno>[0-9A-Z]{2,})/(?<nationality>[A-Z]{2})/(?<birthday>[0-9A-Z]{7})/(?<sex>[A-Z]{1})/(?<cardexpire>[0-9A-Z]{7})/(?<name>[A-Z]{2,}(/)[A-Z]{1,}\s?[A-Z]{0,})(/H)?/P[0-9]{1,}\s*";
    124                 rex = new Regex(str_pinfo, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);
    125                 if (rex.IsMatch(pnrtxt))
    126                 {
    127                     matches = rex.Matches(pnrtxt);
    128                     seq = 0;
    129                     int nowsyear = int.Parse(DateTime.Now.Year.ToString().Substring(3));
    130                     string yearpart = "19";
    131                     foreach (Match match in matches)
    132                     {
    133                         string birthday = match.Groups["birthday"].Value.Trim();
    134                         string cardexpire = match.Groups["cardexpire"].Value.Trim();
    135                         if (result.passes[seq].name == match.Groups["name"].Value.Trim())
    136                         {
    137                             if (int.Parse(birthday.Substring(5)) < nowsyear)
    138                                 yearpart = "20";
    139                             result.passes[seq].cardaddress = match.Groups["cardaddress"].Value.Trim();
    140                             result.passes[seq].cardno = match.Groups["cardno"].Value.Trim();
    141                             result.passes[seq].nationality = match.Groups["nationality"].Value.Trim();
    142                             result.passes[seq].sex = match.Groups["sex"].Value.Trim();
    143                             result.passes[seq].birthday = Convert.ToDateTime(yearpart + birthday.Substring(5) + "-" + MakePnrText.GetMonth(birthday.Substring(2, 3)) + "-" + birthday.Substring(0, 2));
    144                             result.passes[seq].cardexpire = Convert.ToDateTime("20" + cardexpire.Substring(5) + "-" + MakePnrText.GetMonth(cardexpire.Substring(2, 3)) + "-" + cardexpire.Substring(0, 2));
    145                         }
    146                         seq++;
    147                     }
    148                 }
    149 
    150             }
    151             catch (Exception)
    152             {
    153                 msg = "" + seq + "位乘客信息有误,请核实RT编码文本";
    154                 restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
    155                 handle.Error(strreq, strrsp, msg, restime);
    156                 return null;
    157             }
    158 
    159             result.extemmsg = pnr;
    160             restime = Convert.ToInt32((DateTime.Now - _now).TotalMilliseconds);
    161             handle.Succes(strreq, strrsp, restime);
    162             return result;
    163         }
    View Code

    2,HtmlAgilityPack 之 HtmlNode类 (主要是XPath语法解析,firebug插件可以查看对应XPath)

    string detailContext="html 源码";
    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(detailContext);
                HtmlNode node = doc.DocumentNode;
                HtmlNodeCollection trlist = node.SelectNodes("//table[@class='tab_result']//tr[@class='line'][@height='40']");

    3,Newtonsoft.Json序列化和反序列

    这里下载:http://www.newtonsoft.com/products/json/ 
     
    1  List<Models.实体类> list=Newtonsoft.Json.JsonConvert.DeserializeObject<List<Models.实体类>>(context);
    View Code
     1  using (StreamReader reader = new StreamReader(stream))
     2             {
     3                 string jsonData = reader.ReadToEnd();
     4                 // 解析JSON,分析JSON
     5                 JObject objectRoot = JsonConvert.DeserializeObject(jsonData) as JObject;
     6                 JArray imgsArray = objectRoot["imgs"] as JArray;
     7                 for (int i = 0; i < imgsArray.Count; i++)
     8                 {
     9                     JObject img = imgsArray[i] as JObject;
    10                     string objUrl = (string)img["objURL"];
    11                     //txtLogs.AppendText(objUrl + Environment.NewLine); // 测试获取图片路径
    12                     try
    13                     {
    14                         // 下载具体的某一张图片
    15                         DownloadImage(objUrl);
    16                         // 更新进度条
    17                         progressBar.BeginInvoke(new Action(() =>
    18                             {
    19                                 progressBar.Value = i * 100 / sumCount;
    20                             }));
    21                         // 更新文本框
    22                         txtLogs.BeginInvoke(new Action(() =>
    23                             {
    24                                 txtLogs.AppendText("已下载:" + objUrl + Environment.NewLine);
    25                             }));
    26                     }
    27                     catch (Exception ex)
    28                     {
    29                         // 跨线程访问UI线程的txtLogs控件
    30                         txtLogs.BeginInvoke(new Action(() =>
    31                             {
    32                                 txtLogs.AppendText("【异常:" + ex.Message + "" + Environment.NewLine);
    33                             }));
    34                     }
    35                 }
    36             }
    View Code
    1 Regex _rexPC = new Regex(@"([d]{1,})件");
    2 string str="成都-昆明,1件,每件23KG,长宽高100*60*40CM;昆明-万象,1件,每件23KG,长宽高100*60*40CM.万象-昆明,1件,每件23KG,长宽高100*60*40CM;昆明-成都,1件,每件23KG,长宽高100*60*40CM";
    3   if (_rexPC.IsMatch(str))
    4                     {
    5                         var _mch = _rexPC.Match(str);
    6                         int adtpc = StringHelper.StrToInt(_mch.Groups[1].Value, 0);
    7                     }
    View Code
    有时候获取json数据要解析时需手动写实体类,之前一直手写,感觉太浪费时间了,后面找到了一款工具,可以实现转换功能。
     
  • 相关阅读:
    Dreamweaver CS4无法启动:xml parsing fatal error..Designer.xml错误解决方法
    strcpy() strcat() strcmp() gets() puts()
    使用友元,编译出错fatal error C1001: INTERNAL COMPILER ERROR (compiler file 'msc1.cpp', line 1786) 的解决
    HashMap按key排序
    转Oracle数据类型及存储方式【E】
    JAVA_java.util.Date与java.sql.Date相互转换
    Oracle_复制表跟往表插数据
    java_Struts学习例子
    ORA01033: ORACLE initialization or shutdown in progressORACLE
    dorado勾选修改的时候总是选择第一条记录解决办法.
  • 原文地址:https://www.cnblogs.com/systemkk/p/4227004.html
Copyright © 2011-2022 走看看