zoukankan      html  css  js  c++  java
  • 网络爬虫之HTTPClient

    HTTPClient官网:http://hc.apache.org/httpcomponents-client-4.5.x/quickstart.html

    问题一:明明浏览器请求有数据,可使用HTTPClient输出却为空

    	import org.apache.http.*;
    	import org.apache.http.client.*;
    	import org.apache.http.client.methods.HttpGet;
    	import org.apache.http.impl.client.CloseableHttpClient;
    	import org.apache.http.impl.client.HttpClients;
    	import org.apache.http.util.EntityUtils;
    	import org.junit.Test;
    
    	@Test
    	public void httpClientTest1() {
    	    CloseableHttpClient httpclient = HttpClients.createDefault();
            try{
                String url = "https://www.80s.tw";
                HttpGet httpGet = new HttpGet(url);
                System.out.println("executing request " + httpGet.getURI());
                
                ResponseHandler<String> responseHandler = new ResponseHandler<String>(){
                    public String handleResponse(final HttpResponse response) throws ClientProtocolException,IOException{
                        int status = response.getStatusLine().getStatusCode();
                        if (status >= 200 && status < 300){
                            HttpEntity entity = response.getEntity();
                            return entity !=null ? EntityUtils.toString(entity) : null;
                        }else{
                            throw new ClientProtocolException("Unexpected response status: " + status);
                        }
                    }
                };
                String responseBody = null;
    			try {
    				responseBody = httpclient.execute(httpGet,responseHandler);
    			} catch (ClientProtocolException e) {
    				e.printStackTrace();
    			} catch (IOException e) {
    			}
                System.out.println("-------------------------------------------");
                System.out.println(responseBody);
                System.out.println("-------------------------------------------");
            }finally{
                try {
    				httpclient.close();
    			} catch (IOException e) {
    				e.printStackTrace();
    			}
            }
    	}
    

      原因1:访问该网站可能需要证书

      证书解决办法:http://www.cnblogs.com/zhumengke/p/8846912.html

    再次请求时导入我们下载的证书

    import javax.net.ssl.SSLContext;
    import org.apache.http.HttpEntity;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
    import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.ssl.SSLContexts;
    import org.apache.http.util.EntityUtils;
    import org.junit.Test;
    	@Test
    	public void httpTest() {
    		SSLContext sslcontext = null;
    		try {
    			File file = new File("D:/java/jre/lib/security", "jssecacerts");
    			sslcontext = SSLContexts.custom()
    					.loadTrustMaterial(file, "changeit".toCharArray(), new TrustSelfSignedStrategy()).build();
    		}  catch (Exception e) {
    			e.printStackTrace();
    		}
    		SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslcontext, new String[] { "TLSv1" }, null,
    				SSLConnectionSocketFactory.getDefaultHostnameVerifier());
    		CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(sslsf).build();
    		try {
    			HttpGet httpget = new HttpGet("https://www.80s.tw");
    			System.out.println("Executing request " + httpget.getRequestLine());
    			CloseableHttpResponse response = httpclient.execute(httpget);
    			try {
    				HttpEntity entity = response.getEntity();
    				System.out.println("----------------------------------------");
    				System.out.println(response.getStatusLine());
    				System.out.println(EntityUtils.toString(entity));
    				EntityUtils.consume(entity);
    			} finally {
    				response.close();
    			}
    		} catch (Exception e) {
    			e.printStackTrace();
    		} finally {
    			try {
    				httpclient.close();
    			} catch (IOException e) {
    				e.printStackTrace();
    			}
    		}
    	}
    

      

  • 相关阅读:
    nj07---npm
    nj06---包
    nj05---模块
    nj04---事件回调函数
    nj03---阻塞和线程
    nodejs02---demo
    nodejs简介
    【转贴】内存系列一:快速读懂内存条标签
    【转贴】4个你未必知道的内存小知识
    Linux上面mount 域控的目录 超时 然后提示 error的解决办法
  • 原文地址:https://www.cnblogs.com/zhumengke/p/8873706.html
Copyright © 2011-2022 走看看