zoukankan      html  css  js  c++  java
  • 【搜索引擎Jediael开发笔记2】使用HttpClient下载网页至本地文件

    本文使用HttpClient依据url进行网页下载。当中

    (1)HttpClient的相关知识请參见 HttpClient基础教程

    (2)


    package org.ljh.search.downloadpage;
    
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.PrintWriter;
    import java.io.Writer;
    import java.util.Scanner;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpStatus;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    
    //本类用于将指定url相应的网页下载至本地一个文件。
    public class PageDownloader {
    
    	public static void downloadPageByGetMethod(String url) throws IOException {
    
    		// 1、通过HttpGet获取到response对象
    		CloseableHttpClient httpClient = HttpClients.createDefault();
    		// 注意。必须要加上http://的前缀。否则会报:Target host is null异常。

    HttpGet httpGet = new HttpGet(url); CloseableHttpResponse response = httpClient.execute(httpGet); InputStream is = null; if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { try { // 2、获取response的entity。 HttpEntity entity = response.getEntity(); // 3、获取到InputStream对象。并对内容进行处理 is = entity.getContent(); String fileName = getFileName(url); saveToFile("D:\tmp\", fileName, is); } catch (ClientProtocolException e) { e.printStackTrace(); } finally { if (is != null) { is.close(); } if (response != null) { response.close(); } } } } //将输入流中的内容输出到path指定的路径,fileName指定的文件名称 private static void saveToFile(String path, String fileName, InputStream is) { Scanner sc = new Scanner(is); Writer os = null; try { os = new PrintWriter(path + fileName); while (sc.hasNext()) { os.write(sc.nextLine()); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (sc != null) { sc.close(); } if (os != null) { try{ os.flush(); os.close(); }catch(IOException e){ e.printStackTrace(); System.out.println("输出流关闭失败。"); } } } } // 将url中的特殊字符用下划线取代 private static String getFileName(String url) { url = url.substring(7); String fileName = url.replaceAll("[\?

    :*|<>"/]", "_") + ".html"; return fileName; } }




  • 相关阅读:
    hdu1257
    P6198 [EER1]单调栈 题解(分治+构造)
    P3193 [HNOI2008]GT考试 题解(kmp+矩阵快速幂)
    Product of GCDs 题解(欧拉降幂+贡献)
    P2501 [HAOI2006]数字序列 题解(dp+构造)
    欧拉降幂
    I love max and multiply 题解(二进制dp)
    永不言弃 题解(线段树维护hash+二分)
    Problem D. Ice Cream Tower 题解(二分+贪心)
    E. Kefa and Watch 题解(线段树维护hash+循环节结论)
  • 原文地址:https://www.cnblogs.com/blfbuaa/p/7059970.html
Copyright © 2011-2022 走看看