zoukankan      html  css  js  c++  java
  • 使用Post方法模拟登陆爬取网页

    最近弄爬虫,遇到的一个问题就是如何使用post方法模拟登陆爬取网页。下面是极简版的代码:

    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.io.OutputStreamWriter;
    import java.io.PrintWriter;
    
    import java.net.HttpURLConnection;
    import java.net.URL;
    
    import java.util.HashMap;
    
    public class test {
    
    	//post请求地址
    	private static final String POST_URL = "";
    	
    	//模拟谷歌浏览器请求
    	private static final String USER_AGENT = "";
    	
    	//用账号登录某网站后 请求POST_URL链接获取cookie
    	private static final String COOKIE = "";
    	
    	//用账号登录某网站后 请求POST_URL链接获取数据包
    	private static final String REQUEST_DATA =  "";
    	
    	public static void main(String[] args) throws Exception {
    		HashMap<String, String> map = postCapture(REQUEST_DATA);
    		String responseCode = map.get("responseCode");
    		String value = map.get("value");
    		
    		while(!responseCode.equals("200")){
    			map =  postCapture(REQUEST_DATA);
    			responseCode = map.get("responseCode");
    			value = map.get("value");
    		}
    		
    		//打印爬取结果
    		System.out.println(value);
    	}
    	
    	private static HashMap<String, String> postCapture(String requestData) throws Exception{
    		HashMap<String, String> map = new HashMap<>();
    		
    		URL url = new URL(POST_URL);
    		HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
    		httpConn.setDoInput(true); // 设置输入流采用字节流
    		httpConn.setDoOutput(true); // 设置输出流采用字节流
    		httpConn.setUseCaches(false); //设置缓存
    		httpConn.setRequestMethod("POST");//POST请求
    		httpConn.setRequestProperty("User-Agent", USER_AGENT);
    		httpConn.setRequestProperty("Cookie", COOKIE);
    		
    		PrintWriter out = new PrintWriter(new OutputStreamWriter(httpConn.getOutputStream(), "UTF-8"));
    		out.println(requestData);
    		out.close();
    
    		int responseCode = httpConn.getResponseCode();
    		StringBuffer buffer = new StringBuffer();
    		if (responseCode == 200) {
    			BufferedReader reader = new BufferedReader(new InputStreamReader(httpConn.getInputStream(), "UTF-8"));
    			String line = null;
    			while ((line = reader.readLine()) != null) {
    				buffer.append(line);
    			}
    			reader.close();
    			httpConn.disconnect();
    		}
    		
    		map.put("responseCode", new Integer(responseCode).toString());
    		map.put("value", buffer.toString());
    		return map;
    	}
    
    }
    
    

    原文地址:
    http://wangxin123.com/2016/12/19/使用Post方法模拟登陆爬取网页/

  • 相关阅读:
    周末之个人杂想(十三)
    PowerTip of the DaySorting Multiple Properties
    PowerTip of the DayCreate Remoting Solutions
    PowerTip of the DayAdd Help to Your Functions
    PowerTip of the DayAcessing Function Parameters by Type
    PowerTip of the DayReplace Text in Files
    PowerTip of the DayAdding Extra Information
    PowerTip of the DayPrinting Results
    Win7下IIS 7.5配置SSAS(2008)远程访问
    PowerTip of the DayOpening Current Folder in Explorer
  • 原文地址:https://www.cnblogs.com/wangxin37/p/6398743.html
Copyright © 2011-2022 走看看