zoukankan      html  css  js  c++  java
  • 【搜索引擎Jediael开发笔记2】使用HttpClient下载网页至本地文件 分类: C_OHTERS 2014-05-19 15:07 1108人阅读 评论(0) 收藏

    本文使用HttpClient根据url进行网页下载。其中

    (1)HttpClient的相关知识请参见 HttpClient基础教程

    (2)


    package org.ljh.search.downloadpage;
    
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.PrintWriter;
    import java.io.Writer;
    import java.util.Scanner;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpStatus;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    
    //本类用于将指定url对应的网页下载至本地一个文件。
    public class PageDownloader {
    
    	public static void downloadPageByGetMethod(String url) throws IOException {
    
    		// 1、通过HttpGet获取到response对象
    		CloseableHttpClient httpClient = HttpClients.createDefault();
    		// 注意,必需要加上http://的前缀,否则会报:Target host is null异常。
    		HttpGet httpGet = new HttpGet(url);
    		CloseableHttpResponse response = httpClient.execute(httpGet);
    
    		InputStream is = null;
    		if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
    			try {
    				// 2、获取response的entity。
    				HttpEntity entity = response.getEntity();
    
    				// 3、获取到InputStream对象,并对内容进行处理
    				is = entity.getContent();
    
    				String fileName = getFileName(url);
    				saveToFile("D:\tmp\", fileName, is);
    			} catch (ClientProtocolException e) {
    				e.printStackTrace();
    			} finally {
    
    				if (is != null) {
    					is.close();
    				}
    				if (response != null) {
    					response.close();
    				}
    			}
    		}
    	}
    
    	//将输入流中的内容输出到path指定的路径,fileName指定的文件名
    	private static void saveToFile(String path, String fileName, InputStream is) {
    		Scanner sc = new Scanner(is);
    		Writer os = null;
    		try {
    			os = new PrintWriter(path + fileName);
    			while (sc.hasNext()) {
    				os.write(sc.nextLine());
    			}
    		} catch (FileNotFoundException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		} finally {
    			if (sc != null) {
    				sc.close();
    			}
    			if (os != null) {
    				try{
    				os.flush();
    				os.close();
    				}catch(IOException e){
    					e.printStackTrace();
    					System.out.println("输出流关闭失败!");
    				}
    			}
    		}
    	}
    
    	// 将url中的特殊字符用下划线代替
    	private static String getFileName(String url) {
    		url = url.substring(7);
    		String fileName = url.replaceAll("[\?:*|<>"/]", "_") + ".html";
    		return fileName;
    	}
    
    }
    



    版权声明:本文为博主原创文章,未经博主允许不得转载。

  • 相关阅读:
    BZOJ3562 : [SHOI2014]神奇化合物
    BZOJ3559 : [Ctsc2014]图的分割
    BZOJ3551 : [ONTAK2010]Peaks加强版
    BZOJ3542:DZY Loves March
    李洪强iOS开发之
    iOS学习之iOS沙盒(sandbox)机制和文件操作1
    iOS学习之iOS沙盒(sandbox)机制和文件操作
    stringByAppendingPathComponent和stringByAppendingString 的区别
    iOS开发:Toast for iPhone
    深度解析开发项目之 01
  • 原文地址:https://www.cnblogs.com/lujinhong2/p/4637336.html
Copyright © 2011-2022 走看看