zoukankan      html  css  js  c++  java
  • 【网络爬虫】Httpclient4.X中使用HTTPS的方法采集12306网站

    HttpClient请求https的实例:

    package train;
    
    import java.io.IOException;
    import java.security.NoSuchAlgorithmException;
    import java.security.cert.CertificateException;
    import java.security.cert.X509Certificate;
    
    import javax.net.ssl.SSLContext;
    import javax.net.ssl.TrustManager;
    import javax.net.ssl.X509TrustManager;
    
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.ResponseHandler;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.conn.ClientConnectionManager;
    import org.apache.http.conn.scheme.Scheme;
    import org.apache.http.conn.scheme.SchemeRegistry;
    import org.apache.http.conn.ssl.SSLSocketFactory;
    import org.apache.http.impl.client.BasicResponseHandler;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.params.HttpParams;
    
    public class train {
    
        public static void main(String args[]) {
    
            try {
    
                HttpClient httpclient = new DefaultHttpClient();
                // Secure Protocol implementation.
                SSLContext ctx = SSLContext.getInstance("SSL");
                // Implementation of a trust manager for X509 certificates
                X509TrustManager tm = new X509TrustManager() {
    
                    public void checkClientTrusted(X509Certificate[] xcs, String string) throws CertificateException {
    
                    }
    
                    public void checkServerTrusted(X509Certificate[] xcs, String string) throws CertificateException {
                    }
    
                    public X509Certificate[] getAcceptedIssuers() {
                        return null;
                    }
                };
                ctx.init(null, new TrustManager[] { tm }, null);
                SSLSocketFactory ssf = new SSLSocketFactory(ctx);
    
                ClientConnectionManager ccm = httpclient.getConnectionManager();
                // register https protocol in httpclient's scheme registry
                SchemeRegistry sr = ccm.getSchemeRegistry();
                sr.register(new Scheme("https", 443, ssf));
    
                HttpGet httpget = new HttpGet(
                        "https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=2016-08-23&from_station=BJP&to_station=TJP");
                HttpParams params = httpclient.getParams();
    
                System.out.println("Request URL:" + httpget.getURI());
                ResponseHandler responseHandler = new BasicResponseHandler();
    
                String responseBody = (String) httpclient.execute(httpget, responseHandler);
                
    
                System.out.println(responseBody);
    
                // Create a response handler
    
            } catch (NoSuchAlgorithmException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (ClientProtocolException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (Exception ex) {
                ex.printStackTrace();
    
            }
        }
    }
    

      

  • 相关阅读:
    java并发编程-Executor框架 + Callable + Future
    Executors Future Callable 使用场景实例
    大数据云平台Greenplum:多租户篇
    Kafka集成SparkStreaming
    CDH集群安装出现问题参考
    Cloudera Manager卸载笔记
    Hive去除重复数据操作
    032 搭建搜索微服务01----向ElasticSearch中导入数据--通过Feign实现微服务之间的相互调用
    SpringBoot工程常见报错汇总
    030 ElasticSearch----全文检索技术05---基础知识详解03-聚合
  • 原文地址:https://www.cnblogs.com/zeze/p/5800087.html
Copyright © 2011-2022 走看看