zoukankan      html  css  js  c++  java
  • 【Java】通用版URLConnection 带cookie下载PDF等资源文件

    /****
    	 * 下载pdf文件
    	 */
    	public static void downloadNet(String urlStr, String fileName, String savePath) throws MalformedURLException {
    		// 下载网络文件
    		int bytesum = 0;
    		int byteread = 0;
    		// System.out.println(fileName);
    
    		URL url = new URL(urlStr);
    
    		try {
    			URLConnection conn = url.openConnection();
    			conn.setConnectTimeout(30000);
    			conn.setReadTimeout(30000);
    			conn.setRequestProperty("Host", "paper.cnstock.com");
    			conn.setRequestProperty("User-Agent",
    					"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0");
    			conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
    			conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
    			conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码
    			conn.setRequestProperty("Content-Encoding", "utf8");
    			conn.setRequestProperty("Connection", "keep-alive");
    			conn.setRequestProperty("Upgrade-Insecure-Requests", "1");
    			conn.setRequestProperty("Cookie", cookie);
    			conn.setRequestProperty("Cache-Control", "max-age=0");
    			conn.setRequestProperty("Content-Type", "application/pdf");
    
    			// savePage(page,savePath,fileName);
    
    			InputStream inStream = conn.getInputStream();
    			FileOutputStream fs = new FileOutputStream(savePath + fileName);
    
    			byte[] buffer = new byte[1204];
    			int length;
    			while ((byteread = inStream.read(buffer)) != -1) {
    				bytesum += byteread;
    				// System.out.println(bytesum);
    				fs.write(buffer, 0, byteread);
    			}
    			inStream.close();
    			fs.close();
    		} catch (FileNotFoundException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    	}
    

      conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码

      总结:

    采集文件出现异常,用记事本打开文件查看数据,与实际页面作对比,判断是否是乱码。乱码出现的很大可能就是文件的请求编码问题

  • 相关阅读:
    14_java之变量|参数|返回值|修饰符
    NYOJ 202 红黑树 (二叉树)
    NYOJ 138 找球号(二) (哈希)
    NYOJ 136 等式 (哈希)
    NYOJ 133 子序列 (离散化)
    NYOJ 129 树的判定 (并查集)
    NYOJ 117 求逆序数 (树状数组)
    NYOJ 93 汉诺塔 (数学)
    HDU 2050 折线分割平面 (数学)
    天梯赛L2-008 最长对称子串 (字符串处理)
  • 原文地址:https://www.cnblogs.com/zeze/p/7007019.html
Copyright © 2011-2022 走看看