zoukankan      html  css  js  c++  java
  • 从IP138.com上获取手机号段2.0版

      前段时间发现ip138.com上基本上抓取不到手机号段信息了,仔细研究了下,发现是页面输出的内容发生了变化,而且还随机的输出页面的内容。可能是防止别人用程序抓取号段信息???

      用最笨的方法——研究页面输出内容,发现了其中的规律。现在和以前发布的java获取代码比,取消了以前的model类,直接用String数组来输出相关内容,可用性强。

      现在可以抓取到手机号段信息的Java代码如下:

    View Code
      1 /**
      2      * 查询手机号段,返回String数组,查询不到返回null
      3      * String[0]:省份,String[1]:城市,String[2]:手机号类型
      4      * @param smsmobile 需要查询的手机号,最少7位
      5      * @return String数组或null
      6      */
      7     public String[] GetMobileMark(String smsmobile)
      8     {
      9          
     10          //请求URL   
     11         String REQUEST_URL="http://www.ip138.com:8080/search.asp";   
     12         //请求方法   
     13         String REQUEST_MOTHOD="POST"
     14          
     15         BufferedReader br = null
     16         
     17         String[] mobileMarkInfos = new String[3];             
     18         try
     19         {
     20             HttpURLConnection httpConn=(HttpURLConnection)new URL(REQUEST_URL).openConnection();   
     21             
     22             httpConn.setRequestMethod(REQUEST_MOTHOD);   
     23              
     24             httpConn.setDoOutput(true);
     25             
     26             String requestParameter = "mobile="+smsmobile+"&action=mobile";            
     27             httpConn.getOutputStream().write(requestParameter.getBytes());           
     28             httpConn.getOutputStream().flush();
     29             httpConn.getOutputStream().close();
     30             
     31             br = new BufferedReader(new InputStreamReader(httpConn.getInputStream(),"GBK"));
     32             
     33             String lineStr = null;
     34             
     35             while((lineStr = br.readLine())!=null)   
     36             {                
     37                 lineStr = lineStr.trim();    
     38                 if (lineStr.indexOf("卡号归属地"!= -1) { 
     39                     if (lineStr.indexOf("noswap"!= -1) {
     40                         lineStr = br.readLine().trim().replace("<!-- <td></td> -->""");
     41                         Pattern p = Pattern.compile("^<TD width=\\* align=\"center\" class=tdc2>(.*)</TD>$");
     42                         Matcher m = p.matcher(lineStr);
     43                         if (m.matches()) {
     44                             String mobileArea = m.group(1);
     45                             String[] areas = mobileArea.split("&nbsp;"2);
     46                             if(areas.length>1)
     47                             {                             
     48                                 mobileMarkInfos[0= areas[0];
     49                                 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
     50                                     mobileMarkInfos[0= "内蒙古";
     51                                 }
     52                                 mobileMarkInfos[1= areas[1];
     53                             }
     54                             else {
     55                                 return null;
     56                             }   
     57                         }
     58                     } else if (lineStr.indexOf("<!-- <td width=\"130\" align=\"center\">卡号归属地</TD> -->"!= -1) {
     59                         lineStr = br.readLine().trim();
     60                         Pattern p = Pattern.compile("^<TD class=\"tdc2\" align=\"center\">(.*)</TD>$");
     61                         Matcher m = p.matcher(lineStr);
     62                         if (m.matches()) {
     63                             String mobileArea = m.group(1);
     64                             String[] areas = mobileArea.split("&nbsp;"2);
     65                             if(areas.length>1)
     66                             {                             
     67                                 mobileMarkInfos[0= areas[0];
     68                                 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
     69                                     mobileMarkInfos[0= "内蒙古";
     70                                 }
     71                                 mobileMarkInfos[1= areas[1];
     72                             }
     73                             else {
     74                                 return null;
     75                             }   
     76                         }
     77 
     78                     } 
     79                     else {
     80                         lineStr = lineStr.replace("<TD width=\"130\" align=\"center\">卡号归属地</TD>","");
     81                         lineStr = lineStr.replace("<!-- <td></td> -->""");
     82                         Pattern p = Pattern.compile("^<td align=\"center\" class=tdc2>(.*)</TD>$");
     83                         Matcher m = p.matcher(lineStr);
     84                         if (m.matches()) {
     85                             String mobileArea = m.group(1);
     86                             String[] areas = mobileArea.split("&nbsp;"2);
     87                             if(areas.length>1)
     88                             {                             
     89                                 mobileMarkInfos[0= areas[0];
     90                                 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
     91                                     mobileMarkInfos[0= "内蒙古";
     92                                 }
     93                                 mobileMarkInfos[1= areas[1];
     94                             }
     95                             else {
     96                                 return null;
     97                             }   
     98                         }
     99                     }                       
    100                       
    101                 }
    102                 if (lineStr.indexOf("卡&nbsp;类&nbsp;型"!= -1) {
    103                     if (lineStr.indexOf("'tdc2'"!= -1) {
    104 
    105                         lineStr = lineStr.replace("<!-- <td width=\"130\" align=\"center\" noswap></td> --><TD width=\"130\" align=\"center\" noswap>卡&nbsp;类&nbsp;型</td>","");
    106                         Pattern p = Pattern.compile("^<td align=\"center\" class='tdc2'>(.*)</TD>$");
    107                         Matcher m = p.matcher(lineStr);
    108                         if (m.matches()) {
    109                             String cardType = m.group(1);
    110                             mobileMarkInfos[2= cardType; 
    111                             break;
    112                         }
    113                     } else if (lineStr.indexOf("class=tdc2"!= -1) {
    114                         lineStr = lineStr.replace("<TD width=\"130\" align=\"center\" noswap>卡&nbsp;类&nbsp;型</TD>",    "");
    115                         lineStr = lineStr.replace("<!-- <td></td> -->""");
    116                         Pattern p = Pattern.compile("^<TD align=\"center\" class=tdc2>(.*)</TD>$");
    117                         Matcher m = p.matcher(lineStr);
    118                         if (m.matches()) {
    119                             String cardType = m.group(1);
    120                             mobileMarkInfos[2= cardType; 
    121                             break;
    122                         }
    123                     } else {
    124                         lineStr = br.readLine().trim();
    125                         Pattern p = Pattern.compile("^<TD width=\\* align=\"center\" class=tdc2>(.*)</TD>$");
    126                         Matcher m = p.matcher(lineStr);
    127                         if (m.matches()) {
    128                             String cardType = m.group(1);
    129                             mobileMarkInfos[2= cardType; 
    130                             break;
    131                         }
    132                     }
    133                 }
    134                  
    135             }
    136             if(mobileMarkInfos[0].length() == 0){
    137                 return null;
    138             }
    139             return mobileMarkInfos;
    140         }
    141         catch(Exception e)
    142         {
    143             //System.out.println();
    144             Log.writeLog("GetMobileMark Error : " + e.toString());
    145             return null;
    146         }    
    147     }

      使用示例如下:

     1 String smsmobile = "1340100";
     2 String[] contents = GetMobileMark(smsmobile);                    
     3 if(contents != null ) {                        
     4    String province = "未知";
     5    String city = "";
     6    String memo = "";              
     7                      
     8    try
     9    {
    10       if(contents[0].equalsIgnoreCase(province))
    11       {
    12          continue;
    13       }
    14       else {
    15          province = contents[0];
    16       }                             
    17       city = contents[1];                         
    18       memo = contents[2];                            
    19                              
    20    }
    21    catch(Exception e)
    22    {
    23       Log.writeLog("Error : " + e.toString());
    24    }
    25}
  • 相关阅读:
    2021.3.16
    2021.3.15
    通过指定的URL获取返回图片的BASE64编码
    Redis系统学习之缓存穿透,缓存击穿,缓存雪崩的概念及其解决方案
    Redis系统学习之其他高可用模型
    Redis系统学习之哨兵模式
    Redis系统学习之主从复制
    Redis系统学习之发布订阅
    Redis系统学习之持久化(AOF)
    Redis系统学习之持久化(RDB)
  • 原文地址:https://www.cnblogs.com/zsxfbj/p/ip138_V2.html
Copyright © 2011-2022 走看看