前段时间发现ip138.com上基本上抓取不到手机号段信息了,仔细研究了下,发现是页面输出的内容发生了变化,而且还随机的输出页面的内容。可能是防止别人用程序抓取号段信息???
用最笨的方法——研究页面输出内容,发现了其中的规律。现在和以前发布的java获取代码比,取消了以前的model类,直接用String数组来输出相关内容,可用性强。
现在可以抓取到手机号段信息的Java代码如下:
View Code
1 /**
2 * 查询手机号段,返回String数组,查询不到返回null
3 * String[0]:省份,String[1]:城市,String[2]:手机号类型
4 * @param smsmobile 需要查询的手机号,最少7位
5 * @return String数组或null
6 */
7 public String[] GetMobileMark(String smsmobile)
8 {
9
10 //请求URL
11 String REQUEST_URL="http://www.ip138.com:8080/search.asp";
12 //请求方法
13 String REQUEST_MOTHOD="POST";
14
15 BufferedReader br = null;
16
17 String[] mobileMarkInfos = new String[3];
18 try
19 {
20 HttpURLConnection httpConn=(HttpURLConnection)new URL(REQUEST_URL).openConnection();
21
22 httpConn.setRequestMethod(REQUEST_MOTHOD);
23
24 httpConn.setDoOutput(true);
25
26 String requestParameter = "mobile="+smsmobile+"&action=mobile";
27 httpConn.getOutputStream().write(requestParameter.getBytes());
28 httpConn.getOutputStream().flush();
29 httpConn.getOutputStream().close();
30
31 br = new BufferedReader(new InputStreamReader(httpConn.getInputStream(),"GBK"));
32
33 String lineStr = null;
34
35 while((lineStr = br.readLine())!=null)
36 {
37 lineStr = lineStr.trim();
38 if (lineStr.indexOf("卡号归属地") != -1) {
39 if (lineStr.indexOf("noswap") != -1) {
40 lineStr = br.readLine().trim().replace("<!-- <td></td> -->", "");
41 Pattern p = Pattern.compile("^<TD width=\\* align=\"center\" class=tdc2>(.*)</TD>$");
42 Matcher m = p.matcher(lineStr);
43 if (m.matches()) {
44 String mobileArea = m.group(1);
45 String[] areas = mobileArea.split(" ", 2);
46 if(areas.length>1)
47 {
48 mobileMarkInfos[0] = areas[0];
49 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
50 mobileMarkInfos[0] = "内蒙古";
51 }
52 mobileMarkInfos[1] = areas[1];
53 }
54 else {
55 return null;
56 }
57 }
58 } else if (lineStr.indexOf("<!-- <td width=\"130\" align=\"center\">卡号归属地</TD> -->") != -1) {
59 lineStr = br.readLine().trim();
60 Pattern p = Pattern.compile("^<TD class=\"tdc2\" align=\"center\">(.*)</TD>$");
61 Matcher m = p.matcher(lineStr);
62 if (m.matches()) {
63 String mobileArea = m.group(1);
64 String[] areas = mobileArea.split(" ", 2);
65 if(areas.length>1)
66 {
67 mobileMarkInfos[0] = areas[0];
68 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
69 mobileMarkInfos[0] = "内蒙古";
70 }
71 mobileMarkInfos[1] = areas[1];
72 }
73 else {
74 return null;
75 }
76 }
77
78 }
79 else {
80 lineStr = lineStr.replace("<TD width=\"130\" align=\"center\">卡号归属地</TD>","");
81 lineStr = lineStr.replace("<!-- <td></td> -->", "");
82 Pattern p = Pattern.compile("^<td align=\"center\" class=tdc2>(.*)</TD>$");
83 Matcher m = p.matcher(lineStr);
84 if (m.matches()) {
85 String mobileArea = m.group(1);
86 String[] areas = mobileArea.split(" ", 2);
87 if(areas.length>1)
88 {
89 mobileMarkInfos[0] = areas[0];
90 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
91 mobileMarkInfos[0] = "内蒙古";
92 }
93 mobileMarkInfos[1] = areas[1];
94 }
95 else {
96 return null;
97 }
98 }
99 }
100
101 }
102 if (lineStr.indexOf("卡 类 型") != -1) {
103 if (lineStr.indexOf("'tdc2'") != -1) {
104
105 lineStr = lineStr.replace("<!-- <td width=\"130\" align=\"center\" noswap></td> --><TD width=\"130\" align=\"center\" noswap>卡 类 型</td>","");
106 Pattern p = Pattern.compile("^<td align=\"center\" class='tdc2'>(.*)</TD>$");
107 Matcher m = p.matcher(lineStr);
108 if (m.matches()) {
109 String cardType = m.group(1);
110 mobileMarkInfos[2] = cardType;
111 break;
112 }
113 } else if (lineStr.indexOf("class=tdc2") != -1) {
114 lineStr = lineStr.replace("<TD width=\"130\" align=\"center\" noswap>卡 类 型</TD>", "");
115 lineStr = lineStr.replace("<!-- <td></td> -->", "");
116 Pattern p = Pattern.compile("^<TD align=\"center\" class=tdc2>(.*)</TD>$");
117 Matcher m = p.matcher(lineStr);
118 if (m.matches()) {
119 String cardType = m.group(1);
120 mobileMarkInfos[2] = cardType;
121 break;
122 }
123 } else {
124 lineStr = br.readLine().trim();
125 Pattern p = Pattern.compile("^<TD width=\\* align=\"center\" class=tdc2>(.*)</TD>$");
126 Matcher m = p.matcher(lineStr);
127 if (m.matches()) {
128 String cardType = m.group(1);
129 mobileMarkInfos[2] = cardType;
130 break;
131 }
132 }
133 }
134
135 }
136 if(mobileMarkInfos[0].length() == 0){
137 return null;
138 }
139 return mobileMarkInfos;
140 }
141 catch(Exception e)
142 {
143 //System.out.println();
144 Log.writeLog("GetMobileMark Error : " + e.toString());
145 return null;
146 }
147 }
2 * 查询手机号段,返回String数组,查询不到返回null
3 * String[0]:省份,String[1]:城市,String[2]:手机号类型
4 * @param smsmobile 需要查询的手机号,最少7位
5 * @return String数组或null
6 */
7 public String[] GetMobileMark(String smsmobile)
8 {
9
10 //请求URL
11 String REQUEST_URL="http://www.ip138.com:8080/search.asp";
12 //请求方法
13 String REQUEST_MOTHOD="POST";
14
15 BufferedReader br = null;
16
17 String[] mobileMarkInfos = new String[3];
18 try
19 {
20 HttpURLConnection httpConn=(HttpURLConnection)new URL(REQUEST_URL).openConnection();
21
22 httpConn.setRequestMethod(REQUEST_MOTHOD);
23
24 httpConn.setDoOutput(true);
25
26 String requestParameter = "mobile="+smsmobile+"&action=mobile";
27 httpConn.getOutputStream().write(requestParameter.getBytes());
28 httpConn.getOutputStream().flush();
29 httpConn.getOutputStream().close();
30
31 br = new BufferedReader(new InputStreamReader(httpConn.getInputStream(),"GBK"));
32
33 String lineStr = null;
34
35 while((lineStr = br.readLine())!=null)
36 {
37 lineStr = lineStr.trim();
38 if (lineStr.indexOf("卡号归属地") != -1) {
39 if (lineStr.indexOf("noswap") != -1) {
40 lineStr = br.readLine().trim().replace("<!-- <td></td> -->", "");
41 Pattern p = Pattern.compile("^<TD width=\\* align=\"center\" class=tdc2>(.*)</TD>$");
42 Matcher m = p.matcher(lineStr);
43 if (m.matches()) {
44 String mobileArea = m.group(1);
45 String[] areas = mobileArea.split(" ", 2);
46 if(areas.length>1)
47 {
48 mobileMarkInfos[0] = areas[0];
49 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
50 mobileMarkInfos[0] = "内蒙古";
51 }
52 mobileMarkInfos[1] = areas[1];
53 }
54 else {
55 return null;
56 }
57 }
58 } else if (lineStr.indexOf("<!-- <td width=\"130\" align=\"center\">卡号归属地</TD> -->") != -1) {
59 lineStr = br.readLine().trim();
60 Pattern p = Pattern.compile("^<TD class=\"tdc2\" align=\"center\">(.*)</TD>$");
61 Matcher m = p.matcher(lineStr);
62 if (m.matches()) {
63 String mobileArea = m.group(1);
64 String[] areas = mobileArea.split(" ", 2);
65 if(areas.length>1)
66 {
67 mobileMarkInfos[0] = areas[0];
68 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
69 mobileMarkInfos[0] = "内蒙古";
70 }
71 mobileMarkInfos[1] = areas[1];
72 }
73 else {
74 return null;
75 }
76 }
77
78 }
79 else {
80 lineStr = lineStr.replace("<TD width=\"130\" align=\"center\">卡号归属地</TD>","");
81 lineStr = lineStr.replace("<!-- <td></td> -->", "");
82 Pattern p = Pattern.compile("^<td align=\"center\" class=tdc2>(.*)</TD>$");
83 Matcher m = p.matcher(lineStr);
84 if (m.matches()) {
85 String mobileArea = m.group(1);
86 String[] areas = mobileArea.split(" ", 2);
87 if(areas.length>1)
88 {
89 mobileMarkInfos[0] = areas[0];
90 if(mobileMarkInfos[0].equalsIgnoreCase("内蒙")) {
91 mobileMarkInfos[0] = "内蒙古";
92 }
93 mobileMarkInfos[1] = areas[1];
94 }
95 else {
96 return null;
97 }
98 }
99 }
100
101 }
102 if (lineStr.indexOf("卡 类 型") != -1) {
103 if (lineStr.indexOf("'tdc2'") != -1) {
104
105 lineStr = lineStr.replace("<!-- <td width=\"130\" align=\"center\" noswap></td> --><TD width=\"130\" align=\"center\" noswap>卡 类 型</td>","");
106 Pattern p = Pattern.compile("^<td align=\"center\" class='tdc2'>(.*)</TD>$");
107 Matcher m = p.matcher(lineStr);
108 if (m.matches()) {
109 String cardType = m.group(1);
110 mobileMarkInfos[2] = cardType;
111 break;
112 }
113 } else if (lineStr.indexOf("class=tdc2") != -1) {
114 lineStr = lineStr.replace("<TD width=\"130\" align=\"center\" noswap>卡 类 型</TD>", "");
115 lineStr = lineStr.replace("<!-- <td></td> -->", "");
116 Pattern p = Pattern.compile("^<TD align=\"center\" class=tdc2>(.*)</TD>$");
117 Matcher m = p.matcher(lineStr);
118 if (m.matches()) {
119 String cardType = m.group(1);
120 mobileMarkInfos[2] = cardType;
121 break;
122 }
123 } else {
124 lineStr = br.readLine().trim();
125 Pattern p = Pattern.compile("^<TD width=\\* align=\"center\" class=tdc2>(.*)</TD>$");
126 Matcher m = p.matcher(lineStr);
127 if (m.matches()) {
128 String cardType = m.group(1);
129 mobileMarkInfos[2] = cardType;
130 break;
131 }
132 }
133 }
134
135 }
136 if(mobileMarkInfos[0].length() == 0){
137 return null;
138 }
139 return mobileMarkInfos;
140 }
141 catch(Exception e)
142 {
143 //System.out.println();
144 Log.writeLog("GetMobileMark Error : " + e.toString());
145 return null;
146 }
147 }
使用示例如下:
1 String smsmobile = "1340100";
2 String[] contents = GetMobileMark(smsmobile);
3 if(contents != null ) {
4 String province = "未知";
5 String city = "";
6 String memo = "";
7
8 try
9 {
10 if(contents[0].equalsIgnoreCase(province))
11 {
12 continue;
13 }
14 else {
15 province = contents[0];
16 }
17 city = contents[1];
18 memo = contents[2];
19
20 }
21 catch(Exception e)
22 {
23 Log.writeLog("Error : " + e.toString());
24 }
25}
2 String[] contents = GetMobileMark(smsmobile);
3 if(contents != null ) {
4 String province = "未知";
5 String city = "";
6 String memo = "";
7
8 try
9 {
10 if(contents[0].equalsIgnoreCase(province))
11 {
12 continue;
13 }
14 else {
15 province = contents[0];
16 }
17 city = contents[1];
18 memo = contents[2];
19
20 }
21 catch(Exception e)
22 {
23 Log.writeLog("Error : " + e.toString());
24 }
25}