zoukankan      html  css  js  c++  java
  • 【网络爬虫】Httpclient4.X中使用HTTPS的方法采集12306网站

    HttpClient请求https的实例:

    package train;
    
    import java.io.IOException;
    import java.security.NoSuchAlgorithmException;
    import java.security.cert.CertificateException;
    import java.security.cert.X509Certificate;
    
    import javax.net.ssl.SSLContext;
    import javax.net.ssl.TrustManager;
    import javax.net.ssl.X509TrustManager;
    
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.ResponseHandler;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.conn.ClientConnectionManager;
    import org.apache.http.conn.scheme.Scheme;
    import org.apache.http.conn.scheme.SchemeRegistry;
    import org.apache.http.conn.ssl.SSLSocketFactory;
    import org.apache.http.impl.client.BasicResponseHandler;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.params.HttpParams;
    
    public class train {
    
        public static void main(String args[]) {
    
            try {
    
                HttpClient httpclient = new DefaultHttpClient();
                // Secure Protocol implementation.
                SSLContext ctx = SSLContext.getInstance("SSL");
                // Implementation of a trust manager for X509 certificates
                X509TrustManager tm = new X509TrustManager() {
    
                    public void checkClientTrusted(X509Certificate[] xcs, String string) throws CertificateException {
    
                    }
    
                    public void checkServerTrusted(X509Certificate[] xcs, String string) throws CertificateException {
                    }
    
                    public X509Certificate[] getAcceptedIssuers() {
                        return null;
                    }
                };
                ctx.init(null, new TrustManager[] { tm }, null);
                SSLSocketFactory ssf = new SSLSocketFactory(ctx);
    
                ClientConnectionManager ccm = httpclient.getConnectionManager();
                // register https protocol in httpclient's scheme registry
                SchemeRegistry sr = ccm.getSchemeRegistry();
                sr.register(new Scheme("https", 443, ssf));
    
                HttpGet httpget = new HttpGet(
                        "https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=2016-08-23&from_station=BJP&to_station=TJP");
                HttpParams params = httpclient.getParams();
    
                System.out.println("Request URL:" + httpget.getURI());
                ResponseHandler responseHandler = new BasicResponseHandler();
    
                String responseBody = (String) httpclient.execute(httpget, responseHandler);
                
    
                System.out.println(responseBody);
    
                // Create a response handler
    
            } catch (NoSuchAlgorithmException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (ClientProtocolException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (Exception ex) {
                ex.printStackTrace();
    
            }
        }
    }
    

      

  • 相关阅读:
    JBoss下配置数据源加密
    线程返回值的方式介绍
    @Async java 异步方法
    spring 源码下载地址
    springmvc maven 入门及页面拿不到controller的值 显示${message} el表达式不起作用
    mysql常用函数
    eclipse 开发常见问题集锦
    linux 常用命令及零散知识
    js一些注意事项
    liunx 免密登录远程主机
  • 原文地址:https://www.cnblogs.com/zeze/p/5800087.html
Copyright © 2011-2022 走看看