zoukankan      html  css  js  c++  java
  • 【搜索引擎Jediael开发笔记2】使用HttpClient下载网页至本地文件

    本文使用HttpClient依据url进行网页下载。当中

    (1)HttpClient的相关知识请參见 HttpClient基础教程

    (2)


    package org.ljh.search.downloadpage;
    
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.PrintWriter;
    import java.io.Writer;
    import java.util.Scanner;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpStatus;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    
    //本类用于将指定url相应的网页下载至本地一个文件。
    public class PageDownloader {
    
    	public static void downloadPageByGetMethod(String url) throws IOException {
    
    		// 1、通过HttpGet获取到response对象
    		CloseableHttpClient httpClient = HttpClients.createDefault();
    		// 注意。必须要加上http://的前缀。否则会报:Target host is null异常。

    HttpGet httpGet = new HttpGet(url); CloseableHttpResponse response = httpClient.execute(httpGet); InputStream is = null; if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { try { // 2、获取response的entity。 HttpEntity entity = response.getEntity(); // 3、获取到InputStream对象。并对内容进行处理 is = entity.getContent(); String fileName = getFileName(url); saveToFile("D:\tmp\", fileName, is); } catch (ClientProtocolException e) { e.printStackTrace(); } finally { if (is != null) { is.close(); } if (response != null) { response.close(); } } } } //将输入流中的内容输出到path指定的路径,fileName指定的文件名称 private static void saveToFile(String path, String fileName, InputStream is) { Scanner sc = new Scanner(is); Writer os = null; try { os = new PrintWriter(path + fileName); while (sc.hasNext()) { os.write(sc.nextLine()); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (sc != null) { sc.close(); } if (os != null) { try{ os.flush(); os.close(); }catch(IOException e){ e.printStackTrace(); System.out.println("输出流关闭失败。"); } } } } // 将url中的特殊字符用下划线取代 private static String getFileName(String url) { url = url.substring(7); String fileName = url.replaceAll("[\?

    :*|<>"/]", "_") + ".html"; return fileName; } }




  • 相关阅读:
    使用Stream方式处理集合元素
    Consumer方法结合Lambda表达式的应用
    java-遍历字符串的两种方式:1.char charAt(int index);2.char[] toCharArray()
    java-成员变量与局部变量的测试
    java-统计字符串中各字符次数
    java-字符串的遍历和字符串数组的遍历
    java-String类的获取方法(indexOf();substring()等)
    java-模拟登陆练习
    java-String类中的各字符串判断(包括" "和null的区别)
    java-String类的常见面试题
  • 原文地址:https://www.cnblogs.com/blfbuaa/p/7059970.html
Copyright © 2011-2022 走看看