zoukankan      html  css  js  c++  java
  • 【Java】通用版URLConnection 带cookie下载PDF等资源文件

    /****
    	 * 下载pdf文件
    	 */
    	public static void downloadNet(String urlStr, String fileName, String savePath) throws MalformedURLException {
    		// 下载网络文件
    		int bytesum = 0;
    		int byteread = 0;
    		// System.out.println(fileName);
    
    		URL url = new URL(urlStr);
    
    		try {
    			URLConnection conn = url.openConnection();
    			conn.setConnectTimeout(30000);
    			conn.setReadTimeout(30000);
    			conn.setRequestProperty("Host", "paper.cnstock.com");
    			conn.setRequestProperty("User-Agent",
    					"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0");
    			conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
    			conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
    			conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码
    			conn.setRequestProperty("Content-Encoding", "utf8");
    			conn.setRequestProperty("Connection", "keep-alive");
    			conn.setRequestProperty("Upgrade-Insecure-Requests", "1");
    			conn.setRequestProperty("Cookie", cookie);
    			conn.setRequestProperty("Cache-Control", "max-age=0");
    			conn.setRequestProperty("Content-Type", "application/pdf");
    
    			// savePage(page,savePath,fileName);
    
    			InputStream inStream = conn.getInputStream();
    			FileOutputStream fs = new FileOutputStream(savePath + fileName);
    
    			byte[] buffer = new byte[1204];
    			int length;
    			while ((byteread = inStream.read(buffer)) != -1) {
    				bytesum += byteread;
    				// System.out.println(bytesum);
    				fs.write(buffer, 0, byteread);
    			}
    			inStream.close();
    			fs.close();
    		} catch (FileNotFoundException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    	}
    

      conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码

      总结:

    采集文件出现异常,用记事本打开文件查看数据,与实际页面作对比,判断是否是乱码。乱码出现的很大可能就是文件的请求编码问题

  • 相关阅读:
    图的存储结构(邻接矩阵) 数据结构和算法56
    图的存储结构(邻接矩阵)
    图的存储结构(邻接矩阵)
    图的存储结构(邻接矩阵)
    图的存储结构(邻接矩阵)
    图的存储结构(邻接矩阵)
    图的存储结构(邻接矩阵) 数据结构和算法56
    .net 怎么获取文件夹的图片
    .net HTML编码解析
    WebService中方法的重载
  • 原文地址:https://www.cnblogs.com/zeze/p/7007019.html
Copyright © 2011-2022 走看看