zoukankan      html  css  js  c++  java
  • 代理抓取页面,获得访问地址的最终跳转地址 悟寰轩

     1 <%@page import="java.net.URI"%>
     2 <%@page import="java.io.IOException"%>
     3 <%@page import="org.apache.http.HttpHost"%>
     4 <%@page import="org.apache.http.HttpResponse"%>
     5 <%@page import="org.apache.http.HttpStatus"%>
     6 <%@page import="org.apache.http.client.ClientProtocolException"%>
     7 <%@page import="org.apache.http.client.methods.HttpGet"%>
     8 <%@page import="org.apache.http.client.methods.HttpUriRequest"%>
     9 <%@page import="org.apache.http.impl.client.DefaultHttpClient"%>
    10 <%@page import="org.apache.http.protocol.BasicHttpContext"%>
    11 <%@page import="org.apache.http.protocol.ExecutionContext"%>
    12 <%@page import="org.apache.http.protocol.HttpContext"%>
    13 <%@page import="org.apache.http.client.utils.URLEncodedUtils"%>
    14 <%@page import="java.net.URLEncoder"%>
    15 <%@page import="java.io.UnsupportedEncodingException"%>
    16 <%@page import="org.apache.http.impl.client.DefaultRedirectHandler"%>
    17 <%@page import="org.apache.http.ProtocolException"%>
    18 <%@page import="org.apache.http.Header"%>
    19 <%@page import="java.net.URISyntaxException"%>
    20 <%@ taglib uri="http://java.sun.com/jstl/core" prefix="c" %>
    21 <%@ taglib uri="http://www.duxiu.com/proxy" prefix="proxy" %>
    22 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    23 <%!
    24  class CustomRedirectHandler extends DefaultRedirectHandler {
    25     @Override
    26     public URI getLocationURI(HttpResponse response, HttpContext context) throws ProtocolException{
    27         if(isRedirectRequested( response, context))
    28         {
    29             Header locationHeader = response.getFirstHeader("location");
    30             String location= locationHeader.getValue();
    31             if(location!=null&&!"".equals(location)&&!location.startsWith("http")&&location.contains("---")){
    32                 response.removeHeaders("location");
    33                 response.setHeader("location","-----"+location);
    34                 URI uri=null;
    35                 try {
    36                     uri =  new URI("------"+location.substring(0, location.lastIndexOf("url=") + 4)
    37                             + URLEncoder.encode(location.substring(location.indexOf("url=") + 4, location.length())));
    38                 } catch (URISyntaxException e) {
    39                     e.printStackTrace();
    40                 }
    41                 return uri;
    42              }
    43         }
    44         return super.getLocationURI(response,context);
    45         
    46     }
    47 }
    48 %>
    49 <%!public String test1(String url) {
    50     DefaultHttpClient httpClient = new DefaultHttpClient();
    51     CustomRedirectHandler handler=new CustomRedirectHandler();
    52     httpClient.setRedirectHandler(handler);
    53     HttpGet httpget = new HttpGet(url);
    54     HttpContext context = new BasicHttpContext();
    55     HttpResponse response = null;
    56     try {
    57         response = httpClient.execute(httpget, context);
    58     } catch (ClientProtocolException e1) {
    59         e1.printStackTrace();
    60     } catch (IOException e1) {
    61         e1.printStackTrace();
    62     }
    63     if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
    64         try {
    65             throw new IOException(response.getStatusLine().toString());
    66         } catch (IOException e) {
    67             e.printStackTrace();
    68         }
    69     HttpUriRequest currentReq = (HttpUriRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
    70     HttpHost currentHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
    71     String currentUrl = (currentReq.getURI().isAbsolute()) ? currentReq.getURI().toString(): (currentHost.toURI() + currentReq.getURI());
    72     return currentUrl;
    73 } %>
    74 <%
    75 String dx = request.getParameter("dx");
    76 if(dx==null||"".equals(dx))
    77 {
    78     out.println("dx为空!");
    79     return;
    80 }
    81 // 获得最终访问地址
    82 String url =dx;
    83 out.println("url="+url);
    84 String finalURL=test1(url);
    85 //out.println("finalURL="+finalURL);
    86 if(!url.equals(finalURL)){
    87   response.sendRedirect("最终跳转地址");
    88 }
    89 %>

    继承DefaultRedirectHandler,重写获得URI方法-----

  • 相关阅读:
    rabbitmq 学习6rabbitmq基础
    使用Sqlserver事务发布实现数据同步
    rabbitmq 学习2安装
    window server 2003 下安装squid
    rabbitmq 学习3初试1
    rabbitmq 学习5server管理
    RabbitMQ: high performance messaging solution
    AMQP和RabbitMQ入门
    rabbitmq 学习1AMQP介绍
    [解题报告]11689 Soda Surpler
  • 原文地址:https://www.cnblogs.com/sunxucool/p/2821913.html
Copyright © 2011-2022 走看看