zoukankan      html  css  js  c++  java
  • httpClient download file(爬虫)

    package com.opensource.httpclient.bfs;

    import java.io.DataOutputStream;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;

    import org.apache.commons.httpclient.HttpStatus;
    import org.apache.http.Header;
    import org.apache.http.HttpResponse;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.DefaultHttpClient;

    public class DownLoadFile
    {
       
        public String getFileNameByUrl(String url, String contentType)
        {
            url = url.substring(7);
           
            if (contentType.indexOf("html") != -1)
            {
                url = url.replaceAll("[\?/:*|<>"]", "_") + ".html";
                return url;
            }
            else
            {
                return url.replaceAll("[\?/:*|<>"]", "_") + "." + contentType.substring(contentType.lastIndexOf("/") + 1);
            }
        }
       
        public void saveToLocal(byte[] data, String filePath)
        {
            try
            {
                DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(filePath)));
                for (int i = 0; i < data.length; i++)
                    out.write(data[i]);
                out.flush();
                out.close();
            }
            catch (IOException e)
            {
                e.printStackTrace();
            }
        }
       
        public String downloadFile(String url)
            throws ClientProtocolException, IOException
        {
            String filePath = null;
           
            HttpClient httpClient = new DefaultHttpClient();
           
            HttpGet get = new HttpGet(url);
           
            HttpResponse rsp = httpClient.execute(get);
           
            if (rsp.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
            {
                System.err.println("Method failed: " + rsp.getStatusLine());
                filePath = null;
            }
            Header[] header = rsp.getHeaders("Content-Type");
            filePath = "D:\" + getFileNameByUrl(url, header[0].getValue());
           
            saveToLocal(rsp.toString().getBytes(), filePath);
           
            return filePath;
        }
       
        public static void main(String[] args)
            throws ClientProtocolException, IOException
        {
            DownLoadFile downLoadFile = new DownLoadFile();
           
            String temp = downLoadFile.downloadFile("http://www.huawei.com/cn/");
           
            System.out.println(temp);
        }
       
    }

  • 相关阅读:
    当Django模型迁移时,报No migrations to apply 问题时
    django--各个文件的含义
    django--创建项目
    1013. Battle Over Cities (25)
    1011. World Cup Betting (20)
    1009. Product of Polynomials (25)
    1007. Maximum Subsequence Sum (25)
    1006. Sign In and Sign Out (25)
    1008. Elevator (20)
    1004. Counting Leaves (30)
  • 原文地址:https://www.cnblogs.com/james1207/p/3285780.html
Copyright © 2011-2022 走看看