zoukankan      html  css  js  c++  java
  • 新浪微博模拟登陆+数据抓取(java实现)

     模拟登陆部分实现:

     
    package token.exe;
    
    import java.math.BigInteger;
    import java.util.Random;
    
    import org.apache.commons.codec.binary.Base64;
    
    public class WeiboEncoder {
    
        private static BigInteger n = null;
        private static BigInteger e = null;
        
        /**
         * 使用Base64加密用户名(su的获取)
         * @param account
         * @return
         */
        @SuppressWarnings("deprecation")
        public static String encodeAccount(String account){        
            return new String(Base64.encodeBase64(account.getBytes()));
        }
        
        /**
         * 使用RSAEncrypt对用户密码进行加密(sp的获取)
         * @param pwd
         * @param nStr
         * @param eStr
         * @return
         */
        public static String RSAEncrypt(String pwd, String nStr, String eStr){
            n = new BigInteger(nStr,16);
            e = new BigInteger(eStr,16);
            
            BigInteger r = RSADoPublic(pkcs1pad2(pwd,(n.bitLength()+7)>>3));
            String sp = r.toString(16);
            if((sp.length()&1) != 0 ) 
                sp = "0" + sp;
            return sp;
        }
        
        private static BigInteger RSADoPublic(BigInteger x){
             return x.modPow(e, n);
        }
        
        private static BigInteger pkcs1pad2(String s, int n){
            if(n < s.length() + 11) { // TODO: fix for utf-8
                System.err.println("Message too long for RSA");
                return null;
              }
            byte[] ba = new byte[n];
            int i = s.length()-1;
            while(i >= 0 && n > 0) {
                int c = s.codePointAt(i--);
                if(c < 128) { // encode using utf-8
                  ba[--n] = new Byte(String.valueOf(c));
                }
                else if((c > 127) && (c < 2048)) {
                  ba[--n] = new Byte(String.valueOf((c & 63) | 128));
                  ba[--n] = new Byte(String.valueOf((c >> 6) | 192));
                }
                else {
                  ba[--n] = new Byte(String.valueOf((c & 63) | 128));
                  ba[--n] = new Byte(String.valueOf(((c >> 6) & 63) | 128));
                  ba[--n] = new Byte(String.valueOf((c >> 12) | 224));
                }
              }
            ba[--n] = new Byte("0");
            
            byte[] temp = new byte[1];
            Random rdm = new Random(47L);
            
            while(n > 2) { // random non-zero pad
                temp[0] = new Byte("0");
                while(temp[0] == 0) 
                    rdm.nextBytes(temp);
                ba[--n] = temp[0];
            }
            ba[--n] = 2;
            ba[--n] = 0;
            
            return new BigInteger(ba);
        }
         
        
    }
    
     参数实体:
    
     
    
    package token.def;
    
    import java.io.Serializable;
    
    public class LoginParams implements Serializable {
        
        private static final long serialVersionUID = -5775728968372860382L;
        private String pcid;
        private String servertime;
        private String nonce;
        private String rsakv;
        private String imgUrl;
        private String sp;
        private String code;
        private boolean isLogin = true;
        
        public String getPcid() {
            return pcid;
        }
        
        public void setPcid(String pcid) {
            this.pcid = pcid;
        }
        
        public String getServertime() {
            return servertime;
        }
        
        public void setServertime(String servertime) {
            this.servertime = servertime;
        }
        
        public String getNonce() {
            return nonce;
        }
        public void setNonce(String nonce) {
            this.nonce = nonce;
        }
        
        public String getRsakv() {
            return rsakv;
        }
        
        public void setRsakv(String rsakv) {
            this.rsakv = rsakv;
        }
        
        public String getImgUrl() {
            return imgUrl;
        }
    
        public void setImgUrl(String imgUrl) {
            this.imgUrl = imgUrl;
        }
        
        public String getSp() {
            return sp;
        }
    
        public void setSp(String sp) {
            this.sp = sp;
        }
    
        public String getCode() {
            return code;
        }
    
        public void setCode(String code) {
            this.code = code;
        }
    
        public boolean isLogin() {
            return isLogin;
        }
    
        public void setLogin(boolean isLogin) {
            this.isLogin = isLogin;
        }
    
        @Override
        public String toString() {
            return "LoginParams [pcid=" + pcid + ", servertime=" + servertime
                    + ", nonce=" + nonce + ", rsakv=" + rsakv + ", imgUrl="
                    + imgUrl + ", sp=" + sp + ", code=" + code + ", isLogin="
                    + isLogin + "]";
        }
        
    }
    
    登陆部分实现: package token.exe; import java.io.FileOutputStream; import java.io.IOException; import java.net.URLEncoder; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Properties; import java.util.Scanner; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.HttpVersion; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.cookie.CookiePolicy; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.params.HttpClientParams; import org.apache.commons.httpclient.params.HttpConnectionManagerParams; import org.apache.commons.httpclient.protocol.Protocol; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import token.SinaWeiboOAuth; import token.def.LoginParams; import weibo4j.model.MySSLSocketFactory; public class WeiboLoginer { private HttpClient httpClient; //httpClient实例初始化 public WeiboLoginer() { //httpclient连接配置 MultiThreadedHttpConnectionManager httpManager = new MultiThreadedHttpConnectionManager(); HttpConnectionManagerParams connectParams = httpManager.getParams(); connectParams.setConnectionTimeout(3000); connectParams.setDefaultMaxConnectionsPerHost(100); connectParams.setSoTimeout(3000); //httpclient参数配置 HttpClientParams httpParams = new HttpClientParams(); httpParams.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); httpParams.setVersion(HttpVersion.HTTP_1_1); //设置默认Header List<Header> headers = new ArrayList<Header>(); headers.add(new Header("Content-Type", "application/x-www-form-urlencoded")); headers.add(new Header("Host", "login.sina.com.cn")); headers.add(new Header("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0")); headers.add(new Header("API-RemoteIP", "192.168.0.1"));//伪造新浪验证IP headers.add(new Header("X-Forwarded-For","192.168.0.1"));//伪造真实IP headers.add(new Header("CLIENT-IP", "192.168.0.1"));//伪造客户端IP //初始化httpclient httpClient = new HttpClient(httpParams, httpManager); httpClient.getHostConfiguration().getParams().setParameter("http.default-headers", headers); //设置ssl协议 Protocol protocol = new Protocol("https",new MySSLSocketFactory(), 443); Protocol.registerProtocol("https", protocol); //设置代理 // httpClient.getHostConfiguration().setProxy("", 0); // httpClient.getParams().setAuthenticationPreemptive(false); } /** * 登陆并获取code值,如果出现验证码则返回还有验证码的参数信息 * @return */ public LoginParams doLogin(String username, String password) { Properties properties = initProperties(); String base64UserCount = WeiboEncoder.encodeAccount(username); HashMap<String, String> pubkeyMap = null; String sp = null; String imgUrl = null; LoginParams loginParams = new LoginParams(); try { pubkeyMap = pubKeyMap(base64UserCount); sp = WeiboEncoder.RSAEncrypt(password, pubkeyMap.get("pubkey"),"10001"); imgUrl = getPin(pubkeyMap); if (imgUrl != null) { loginParams.setPcid(pubkeyMap.get("pcid")); loginParams.setNonce(pubkeyMap.get("nonce")); loginParams.setServertime(pubkeyMap.get("servertime")); loginParams.setRsakv(pubkeyMap.get("rsakv")); loginParams.setImgUrl(imgUrl); loginParams.setSp(sp); return loginParams; } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } HashMap<String, String> ticketMap = null; try { ticketMap = getTicket(base64UserCount, sp, pubkeyMap); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } //确认在最终登陆后是否再需要验证码(账号为新浪的注册邮箱) String vcUrl = isHasPinAgain(pubkeyMap, ticketMap); if (vcUrl != null) { loginParams.setPcid(pubkeyMap.get("pcid")); loginParams.setNonce(pubkeyMap.get("nonce")); loginParams.setServertime(pubkeyMap.get("servertime")); loginParams.setRsakv(pubkeyMap.get("rsakv")); loginParams.setImgUrl(imgUrl); loginParams.setSp(sp); return loginParams; } try { String code = authorize(ticketMap.get("ticket"), properties.getProperty("authorizeURL"), properties.getProperty("redirect_URI"), properties.getProperty("client_ID"), username, ticketMap.get("uid")); loginParams.setCode(code); } catch (KeyManagementException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (NoSuchAlgorithmException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return loginParams; } /** * 有验证码时登陆 * @param sp * @param pin * @param pcid * @param servertime * @param nonce * @param rsakv * @return */ public LoginParams doLoginByPin(String username, String sp, String pin, String pcid, String servertime,String nonce,String rsakv ) { Properties properties = initProperties(); String base64UserCount = WeiboEncoder.encodeAccount(username); HashMap<String, String> ticketMap = null; LoginParams params = new LoginParams(); try { ticketMap = getTicket(base64UserCount, sp, pin, pcid, servertime, nonce, rsakv); if (ticketMap.containsKey("reason")) { //意为"输入的验证码不正确" String reply = "\u8f93\u5165\u7684\u9a8c\u8bc1\u7801\u4e0d\u6b63\u786e"; String reasonStr = ticketMap.get("reason"); if (reasonStr.equals(reply)) { params.setLogin(false); return params; } } String code = authorize(ticketMap.get("ticket"), properties.getProperty("authorizeURL"), properties.getProperty("redirect_URI"), properties.getProperty("client_ID"), username, ticketMap.get("uid")); params.setCode(code); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return params; } /** * 模拟新浪授权 * @param ticket ticket参数 * @param redirectURI 回调地址 * @param clientId appKey * @param username 用户名 * @return token * @throws IOException * @throws KeyManagementException * @throws NoSuchAlgorithmException */ private String authorize(String ticket, String authorizeURL, String redirectURI, String clientId, String username, String uid) throws IOException, KeyManagementException, NoSuchAlgorithmException { String code = null; String url = authorizeURL + "?client_id=" + clientId + "&redirect_uri=" + redirectURI + "&response_type=code&forcelogin=true"; String regCallback = authorizeURL + "?client_id=" + clientId + "&redirect_uri=" + redirectURI + "&response_type=code&display=default&from=&with_cookie="; PostMethod post = new PostMethod(authorizeURL); //模拟申请token的链接,如果不添加,那么回调地址返回则为空 post.setRequestHeader("Referer",url); // 模拟登录时所要提交的参数信息 NameValuePair[] formpPairs=new NameValuePair[]{ new NameValuePair("action", "login"), new NameValuePair("userId",username), new NameValuePair("ticket", ticket), new NameValuePair("response_type", "code"), new NameValuePair("redirect_uri", redirectURI), new NameValuePair("client_id", clientId), new NameValuePair("regCallback", URLEncoder.encode(regCallback, "UTF-8")) }; post.setRequestBody(formpPairs); int status = httpClient.executeMethod(post); if (status == HttpStatus.SC_OK) { byte[] htmlDatas = post.getResponseBody(); code = authorizeAgain(htmlDatas, ticket, authorizeURL, redirectURI, clientId, username, uid); }else if (status == 302) { Header locationHeader = post.getResponseHeader("location"); String location = locationHeader.getValue(); code = location.substring(location.indexOf("=")+1); } return code; } /** * 二次提交授权申请 * @param htmlDatas 第一次授权申请返回的页面数据 * @return * @throws IOException * @throws HttpException */ private String authorizeAgain(byte[] htmlDatas, String ticket, String authorizeURL, String redirectURI,String clientId, String username, String uid) throws HttpException, IOException { String verifyToken = null; String html = new String(htmlDatas, "utf-8"); Document doc = Jsoup.parse(html); Element verifyTokeneElement = doc.select("input[name=verifyToken]").first(); verifyToken = verifyTokeneElement.attr("value"); String code = null; String url = authorizeURL + "?client_id=" + clientId + "&redirect_uri=" + redirectURI + "&response_type=code&forcelogin=true"; String regCallback = authorizeURL + "?client_id=" + clientId + "&redirect_uri=" + redirectURI + "&response_type=code&display=default&from=&with_cookie="; PostMethod post = new PostMethod(authorizeURL); //模拟申请token的链接,如果不添加,那么回调地址返回则为空 post.setRequestHeader("Referer",authorizeURL); // 模拟登录时所要提交的参数信息 NameValuePair[] formpPairs=new NameValuePair[]{ new NameValuePair("action", "authorize"), new NameValuePair("uid",uid), new NameValuePair("url", url), new NameValuePair("response_type", "code"), new NameValuePair("redirect_uri", redirectURI), new NameValuePair("client_id", clientId), new NameValuePair("verifyToken", verifyToken), new NameValuePair("regCallback", URLEncoder.encode(regCallback, "UTF-8")) }; post.setRequestBody(formpPairs); int status = httpClient.executeMethod(post); if (status == 302) { Header locationHeader = post.getResponseHeader("location"); String location = locationHeader.getValue(); if (location == null) { throw new NullPointerException("redirect_uri is null"); } code = location.substring(location.indexOf("=")+1); } return code; } /** * 模拟用户预登录 * @param unameBase64 * @return * @throws IOException */ private HashMap<String, String> pubKeyMap(String unameBase64) throws IOException { String url = "https://login.sina.com.cn/sso/prelogin.php?" + "entry=openapi&" + "callback=sinaSSOController.preloginCallBack&" + "su=" + unameBase64 + "&" + "rsakt=mod&" + "checkpin=1&" + "client=ssologin.js(v1.4.5)" + "&_=" + new Date().getTime(); return getParaFromResult(get(url)); } /** * 预登陆是否需要验证码 * @param pubkeyMap * @return */ private String getPin(HashMap<String, String> pubkeyMap) { String imgUrl = null; int isShowpin = 0; if (pubkeyMap != null) { String showpin = pubkeyMap.get("showpin"); if (showpin != null) { isShowpin = Integer.parseInt(showpin); if (isShowpin == 1) { String url = "https://login.sina.com.cn/cgi/pin.php?" + "r=" + Math.floor(Math.random() * 100000000) + "&s=0" + "&p=" + pubkeyMap.get("pcid"); imgUrl = url; } } } return imgUrl; } /** * 确认登陆后是否需要再验证 * @return */ private String isHasPinAgain(HashMap<String, String> pubkeyMap, HashMap<String, String> ticketMap) { String imgUrl = null; int isHasPin = 0; if ((pubkeyMap != null) && (ticketMap != null)) { //意为"为了您的帐号安全,请输入验证码" String str = "\u4e3a\u4e86\u60a8\u7684\u5e10\u53f7\u5b89" + "\u5168\uff0c\u8bf7\u8f93\u5165\u9a8c\u8bc1\u7801"; if (ticketMap.containsKey("reason")) { String reasonStr = ticketMap.get("reason"); if (reasonStr.equals(str)) { isHasPin = 1; String url = "https://login.sina.com.cn/cgi/pin.php?" + "r=" + Math.floor(Math.random() * 100000000) + "&s=0" + "&p=" + pubkeyMap.get("pcid"); imgUrl = url; } } } return imgUrl; } /** * 获取验证码 */ public String getVCode(String pcid) { String imgUrl = null; if (pcid != null) { String url = "https://login.sina.com.cn/cgi/pin.php?" + "r=" + Math.floor(Math.random() * 100000000) + "&s=0" + "&p=" + pcid; imgUrl = url; } return imgUrl; } /** * 保存验证码 * @param url 验证码链接 */ public void saveVCodeImg(String url) { GetMethod getImages = new GetMethod(url); try { int status = httpClient.executeMethod(getImages); if (status == HttpStatus.SC_OK) { FileOutputStream outputStream = new FileOutputStream("vc.jpg"); outputStream.write(getImages.getResponseBody()); outputStream.close(); } } catch (HttpException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 无验证码时模拟用户登录,并获取ticket * @param usernameBase64 使用Base64加密的用户名 * @param sp 使用SHA1加密后的用户密码 * @return * @throws Exception */ private HashMap<String, String> getTicket(String usernameBase64, String sp, HashMap<String, String> pubkeyMap) throws Exception { String url = null; if (pubkeyMap != null) { url = "https://login.sina.com.cn/sso/login.php?" + "entry=openapi&" + "gateway=1&" + "from=&" + "savestate=0&" + "useticket=1&" + "pagerefer=&" + "ct=1800&" + "s=1&" + "vsnf=1&" + "vsnval=&" + "door=&" + "su="+ usernameBase64 + "&" + "service=miniblog&" + "servertime="+ pubkeyMap.get("servertime") + "&" + "nonce="+ pubkeyMap.get("nonce") + "&" + "pwencode=rsa&" + "rsakv="+ pubkeyMap.get("rsakv") + "&" + "sp="+ sp + "&" + "encoding=UTF-8&" + "callback=sinaSSOController.loginCallBack&" + "cdult=2&" + "domain=weibo.com&" + "prelt=37&" + "returntype=TEXT&" + "client=ssologin.js(v1.4.5)&" + "_=" + new Date().getTime(); } return getParaFromResult(get(url)); } /** * 有验证码时模拟用户登录,并获取ticket * @param usernameBase64 * @param sp * @param pin * @param pcid * @param servertime * @param nonce * @param rsakv * @return * @throws Exception */ public HashMap<String, String> getTicket(String usernameBase64, String sp, String pin, String pcid, String servertime,String nonce,String rsakv) throws Exception { String url = "https://login.sina.com.cn/sso/login.php?" + "entry=openapi&" + "gateway=1&" + "from=&" + "savestate=0&" + "useticket=1&" + "pagerefer=&" + "pcid=" + pcid + "&" + "ct=1800&" + "s=1&" + "vsnf=1&" + "vsnval=&" + "door=" + pin + "&" + "su="+ usernameBase64 + "&" + "service=miniblog&" + "servertime="+ servertime + "&" + "nonce="+ nonce + "&" + "pwencode=rsa&" + "rsakv="+ rsakv + "&" + "sp="+ sp + "&" + "encoding=UTF-8&" + "callback=sinaSSOController.loginCallBack&" + "cdult=2&" + "domain=weibo.com&" + "prelt=37&" + "returntype=TEXT&" + "client=ssologin.js(v1.4.5)&" + "_=" + new Date().getTime(); return getParaFromResult(get(url)); } /** * 分析结果,取出所需参数 * @param result 页面内容 * @return */ private HashMap<String, String> getParaFromResult(String result) { HashMap<String, String> hm = new HashMap<String, String>(); result = result.substring(result.indexOf("{") + 1, result.indexOf("}")); String[] r = result.split(","); String[] temp; for (int i = 0; i < r.length; i++) { temp = r[i].split(":"); for (int j = 0; j < 2; j++) { if (temp[j].contains(""")) temp[j] = temp[j].substring(1, temp[j].length() - 1); } hm.put(temp[0], temp[1]); } return hm; } /** * 执行给定的URL,并输出目标URL返回的页面结果 * @param url * @return * @throws IOException */ private String get(String url) throws IOException { String surl = null; GetMethod getMethod = new GetMethod(url); int status = httpClient.executeMethod(getMethod); if (status == HttpStatus.SC_OK) { surl = new String(getMethod.getResponseBody(), "UTF-8"); } getMethod.releaseConnection(); return surl; } /** * 配置信息初始化 * @return */ private Properties initProperties() { Properties prop = new Properties(); try { prop.load(Thread.currentThread().getContextClassLoader(). getResourceAsStream("config.properties")); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return prop; } /** * @param args */ public static void main(String[] args) { WeiboLoginer loginer = new WeiboLoginer(); LoginParams loginParams = loginer.doLogin("",""); //有验证码时 if (loginParams.getCode() == null) { String pcid = loginParams.getPcid(); String nonce = loginParams.getNonce(); String rsakv = loginParams.getRsakv(); String servertime = loginParams.getServertime(); String sp = loginParams.getSp(); System.err.println(loginParams.getImgUrl()); //再次获取验证码 System.err.println(loginer.getVCode(pcid)); Scanner input = new Scanner(System.in); String pin = input.nextLine(); LoginParams loginResult = loginer.doLoginByPin("",sp, pin, pcid, servertime, nonce, rsakv); if (!loginResult.isLogin()) { System.err.println("验证码错误!重新录入"); //获取验证码并保存(测试) String imgUrl = loginer.getVCode(pcid); loginer.saveVCodeImg(imgUrl); Scanner input1= new Scanner(System.in); String pin1 = input1.nextLine(); String code = loginer.doLoginByPin("",sp, pin1, pcid, servertime, nonce, rsakv).getCode(); System.out.println(SinaWeiboOAuth.getToken(code)); } }else { //无验证码时 String code = loginParams.getCode(); System.out.println(SinaWeiboOAuth.getToken(code)); } } }

    参考地址 http://www.cnblogs.com/zhengbing/p/3459249.html

  • 相关阅读:
    .net注册iis
    hdu 1081To The Max
    hdu 1312Red and Black
    hdu 1016Prime Ring Problem
    hdu 1159Common Subsequence
    hdu 1372Knight Moves
    hdu 1686Oulipo
    hdu 1241Oil Deposits
    hdu 1171Big Event in HDU
    hdu 4006The kth great number
  • 原文地址:https://www.cnblogs.com/liinux/p/5558969.html
Copyright © 2011-2022 走看看