zoukankan      html  css  js  c++  java
  • 代理抓取页面,获得访问地址的最终跳转地址 悟寰轩

     1 <%@page import="java.net.URI"%>
     2 <%@page import="java.io.IOException"%>
     3 <%@page import="org.apache.http.HttpHost"%>
     4 <%@page import="org.apache.http.HttpResponse"%>
     5 <%@page import="org.apache.http.HttpStatus"%>
     6 <%@page import="org.apache.http.client.ClientProtocolException"%>
     7 <%@page import="org.apache.http.client.methods.HttpGet"%>
     8 <%@page import="org.apache.http.client.methods.HttpUriRequest"%>
     9 <%@page import="org.apache.http.impl.client.DefaultHttpClient"%>
    10 <%@page import="org.apache.http.protocol.BasicHttpContext"%>
    11 <%@page import="org.apache.http.protocol.ExecutionContext"%>
    12 <%@page import="org.apache.http.protocol.HttpContext"%>
    13 <%@page import="org.apache.http.client.utils.URLEncodedUtils"%>
    14 <%@page import="java.net.URLEncoder"%>
    15 <%@page import="java.io.UnsupportedEncodingException"%>
    16 <%@page import="org.apache.http.impl.client.DefaultRedirectHandler"%>
    17 <%@page import="org.apache.http.ProtocolException"%>
    18 <%@page import="org.apache.http.Header"%>
    19 <%@page import="java.net.URISyntaxException"%>
    20 <%@ taglib uri="http://java.sun.com/jstl/core" prefix="c" %>
    21 <%@ taglib uri="http://www.duxiu.com/proxy" prefix="proxy" %>
    22 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    23 <%!
    24  class CustomRedirectHandler extends DefaultRedirectHandler {
    25     @Override
    26     public URI getLocationURI(HttpResponse response, HttpContext context) throws ProtocolException{
    27         if(isRedirectRequested( response, context))
    28         {
    29             Header locationHeader = response.getFirstHeader("location");
    30             String location= locationHeader.getValue();
    31             if(location!=null&&!"".equals(location)&&!location.startsWith("http")&&location.contains("---")){
    32                 response.removeHeaders("location");
    33                 response.setHeader("location","-----"+location);
    34                 URI uri=null;
    35                 try {
    36                     uri =  new URI("------"+location.substring(0, location.lastIndexOf("url=") + 4)
    37                             + URLEncoder.encode(location.substring(location.indexOf("url=") + 4, location.length())));
    38                 } catch (URISyntaxException e) {
    39                     e.printStackTrace();
    40                 }
    41                 return uri;
    42              }
    43         }
    44         return super.getLocationURI(response,context);
    45         
    46     }
    47 }
    48 %>
    49 <%!public String test1(String url) {
    50     DefaultHttpClient httpClient = new DefaultHttpClient();
    51     CustomRedirectHandler handler=new CustomRedirectHandler();
    52     httpClient.setRedirectHandler(handler);
    53     HttpGet httpget = new HttpGet(url);
    54     HttpContext context = new BasicHttpContext();
    55     HttpResponse response = null;
    56     try {
    57         response = httpClient.execute(httpget, context);
    58     } catch (ClientProtocolException e1) {
    59         e1.printStackTrace();
    60     } catch (IOException e1) {
    61         e1.printStackTrace();
    62     }
    63     if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
    64         try {
    65             throw new IOException(response.getStatusLine().toString());
    66         } catch (IOException e) {
    67             e.printStackTrace();
    68         }
    69     HttpUriRequest currentReq = (HttpUriRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
    70     HttpHost currentHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
    71     String currentUrl = (currentReq.getURI().isAbsolute()) ? currentReq.getURI().toString(): (currentHost.toURI() + currentReq.getURI());
    72     return currentUrl;
    73 } %>
    74 <%
    75 String dx = request.getParameter("dx");
    76 if(dx==null||"".equals(dx))
    77 {
    78     out.println("dx为空!");
    79     return;
    80 }
    81 // 获得最终访问地址
    82 String url =dx;
    83 out.println("url="+url);
    84 String finalURL=test1(url);
    85 //out.println("finalURL="+finalURL);
    86 if(!url.equals(finalURL)){
    87   response.sendRedirect("最终跳转地址");
    88 }
    89 %>

    继承DefaultRedirectHandler,重写获得URI方法-----

  • 相关阅读:
    .net core 3.1 使用Redis缓存
    JavaSE 高级 第11节 缓冲输入输出字节流
    JavaSE 高级 第10节 字节数组输出流ByteArrayOutputStream
    JavaSE 高级 第09节 字节数组输入流ByteArrayInputStream
    JavaSE 高级 第08节 文件输出流FileOutputStream
    JavaSE 高级 第07节 文件输入流FileInputStream
    JavaSE 高级 第06节 初识I、O流
    JavaSE 高级 第05节 日期类与格式化
    JavaSE 高级 第04节 StringBuffer类
    JavaSE 高级 第03节 Math类与猜数字游戏
  • 原文地址:https://www.cnblogs.com/sunxucool/p/2821913.html
Copyright © 2011-2022 走看看