zoukankan      html  css  js  c++  java
  • httpClient download file(爬虫)

    package com.opensource.httpclient.bfs;

    import java.io.DataOutputStream;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;

    import org.apache.commons.httpclient.HttpStatus;
    import org.apache.http.Header;
    import org.apache.http.HttpResponse;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.DefaultHttpClient;

    public class DownLoadFile
    {
       
        public String getFileNameByUrl(String url, String contentType)
        {
            url = url.substring(7);
           
            if (contentType.indexOf("html") != -1)
            {
                url = url.replaceAll("[\?/:*|<>"]", "_") + ".html";
                return url;
            }
            else
            {
                return url.replaceAll("[\?/:*|<>"]", "_") + "." + contentType.substring(contentType.lastIndexOf("/") + 1);
            }
        }
       
        public void saveToLocal(byte[] data, String filePath)
        {
            try
            {
                DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(filePath)));
                for (int i = 0; i < data.length; i++)
                    out.write(data[i]);
                out.flush();
                out.close();
            }
            catch (IOException e)
            {
                e.printStackTrace();
            }
        }
       
        public String downloadFile(String url)
            throws ClientProtocolException, IOException
        {
            String filePath = null;
           
            HttpClient httpClient = new DefaultHttpClient();
           
            HttpGet get = new HttpGet(url);
           
            HttpResponse rsp = httpClient.execute(get);
           
            if (rsp.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
            {
                System.err.println("Method failed: " + rsp.getStatusLine());
                filePath = null;
            }
            Header[] header = rsp.getHeaders("Content-Type");
            filePath = "D:\" + getFileNameByUrl(url, header[0].getValue());
           
            saveToLocal(rsp.toString().getBytes(), filePath);
           
            return filePath;
        }
       
        public static void main(String[] args)
            throws ClientProtocolException, IOException
        {
            DownLoadFile downLoadFile = new DownLoadFile();
           
            String temp = downLoadFile.downloadFile("http://www.huawei.com/cn/");
           
            System.out.println(temp);
        }
       
    }

  • 相关阅读:
    「SDOI2018」物理实验
    「SDOI 2018」战略游戏
    「CodeChef Dec13 REALSET」 Petya and Sequence 循环卷积
    关于微信卡券投放code接口报错原因
    composer update maatwebsite/excel 之后 在linux机子上出现500解决方案
    开启mysql 服务【window】
    thinkphp在linux上部署环境(500情况)
    如何推广微信小程序到企业微信
    linux 阿里云远程连接mysql
    php7以上 不支持mcrypt_module_open方法问题【微信开放平台】
  • 原文地址:https://www.cnblogs.com/james1207/p/3285780.html
Copyright © 2011-2022 走看看