zoukankan      html  css  js  c++  java
  • HttpClient+jsoup登录+解析 163邮箱

    找了几个,只有这个靠谱,用的是httpclient4,另外还需要commons-lang和jsoup包

    http://jsoup.org/
     

    http://www.oschina.net/code/snippet_128625_12592?p=2

    ————————————————————————————————————————————————————————————

    如题:
    支用用jsoup解析页面非常方便,当时jsoup做登录就比较麻烦,反正我不知道怎么做。
    HttpClient做登录比较方便因此用HttpClient摸得登录获取html内容用jsoup做解析是一个非常完美的组合
    替换自己的163邮箱看一下吧。

    HttpClientHelper 封装

    import java.io.IOException;
    import java.security.cert.CertificateException;
    import java.security.cert.X509Certificate;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;

    import javax.net.ssl.SSLContext;
    import javax.net.ssl.TrustManager;
    import javax.net.ssl.X509TrustManager;

    import org.apache.commons.lang.StringUtils;
    import org.apache.http.Header;
    import org.apache.http.HttpResponse;
    import org.apache.http.NameValuePair;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.entity.UrlEncodedFormEntity;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.conn.ClientConnectionManager;
    import org.apache.http.conn.scheme.Scheme;
    import org.apache.http.conn.scheme.SchemeRegistry;
    import org.apache.http.conn.ssl.SSLSocketFactory;
    import org.apache.http.cookie.Cookie;
    import org.apache.http.impl.client.BasicCookieStore;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.message.BasicHeader;
    import org.apache.http.message.BasicNameValuePair;
    import org.apache.http.protocol.BasicHttpContext;
    import org.apache.http.protocol.HttpContext;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    /**
     * HttpClient 封装
     * 
     * 
    @author bangis.wangdf
     
    */
    public class HttpClientHelper {

        private static Logger    LOG              = LoggerFactory.getLogger(HttpClientHelper.class);
        private HttpClient       httpclient       = new DefaultHttpClient();
        private HttpContext      localContext     = new BasicHttpContext();
        private BasicCookieStore basicCookieStore = new BasicCookieStore();                          // cookie存储用来完成登录后记录相关信息

        private int              TIME_OUT         = 3;                                              // 连接超时时间

        public HttpClientHelper() {
            instance();
        }

        /**
         * 启用cookie存储
         
    */
        private void instance() {
            httpclient.getParams().setIntParameter("http.socket.timeout", TIME_OUT * 1000);
            localContext.setAttribute("http.cookie-store", basicCookieStore);// Cookie存储
        }

        /**
         * 
    @param ssl boolean=true 支持https网址,false同默认构造
         
    */
        public HttpClientHelper(boolean ssl) {
            instance();
            if (ssl) {
                try {
                    X509TrustManager tm = new X509TrustManager() {

                        public void checkClientTrusted(X509Certificate[] xcs, String string) throws CertificateException {
                        }

                        public void checkServerTrusted(X509Certificate[] xcs, String string) throws CertificateException {
                        }

                        public X509Certificate[] getAcceptedIssuers() {
                            return null;
                        }
                    };
                    SSLContext ctx = SSLContext.getInstance("TLS");
                    ctx.init(nullnew TrustManager[] { tm }, null);
                    SSLSocketFactory ssf = new SSLSocketFactory(ctx);
                    ClientConnectionManager ccm = httpclient.getConnectionManager();
                    SchemeRegistry sr = ccm.getSchemeRegistry();
                    sr.register(new Scheme("https", ssf, 443));
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }

        /**
         * 
    @param url
         * 
    @param headers 指定headers
         * 
    @return
         
    */
        public HttpResult get(String url, Header... headers) {
            HttpResponse response;
            HttpGet httpget = new HttpGet(url);
            if (headers != null) {
                for (Header h : headers) {
                    httpget.addHeader(h);
                }
            } else {// 如不指定则使用默认
                Header header = new BasicHeader(
                                                "User-Agent",
                                                "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;  .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; InfoPath.2)");
                httpget.addHeader(header);
            }
            HttpResult httpResult = HttpResult.empty();
            try {
                response = httpclient.execute(httpget, localContext);
                httpResult = new HttpResult(localContext, response);
            } catch (IOException e) {
                LOG.error(" get ", e);
                httpget.abort();
            }
            return httpResult;
        }

        public HttpResult post(String url, Map<String, String> data, Header... headers) {
            HttpResponse response;
            HttpPost httppost = new HttpPost(url);
            String contentType = null;
            if (headers != null) {
                int size = headers.length;
                for (int i = 0; i < size; ++i) {
                    Header h = (Header) headers[i];
                    if (!(h.getName().startsWith("$x-param"))) {
                        httppost.addHeader(h);
                    }
                    if ("Content-Type".equalsIgnoreCase(h.getName())) {
                        contentType = h.getValue();
                    }
                }

            }
            if (contentType != null) {
                httppost.setHeader("Content-Type", contentType);
            } else if (data != null) {
                httppost.setHeader("Content-Type", "application/x-www-form-urlencoded");
            }

            List<NameValuePair> formParams = new ArrayList<NameValuePair>();
            for (String key : data.keySet()) {
                formParams.add(new BasicNameValuePair(key, (String) data.get(key)));
            }
            HttpResult httpResult = HttpResult.empty();
            try {
                UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formParams, "UTF-8");
                httppost.setEntity(entity);
                response = httpclient.execute(httppost, localContext);
                httpResult = new HttpResult(localContext, response);
            } catch (IOException e) {
                LOG.error(" post ", e);
                httppost.abort();
            } finally {
            }
            return httpResult;
        }

        public String getCookie(String name, String... domain) {
            String dm = "";
            if (domain != null && domain.length >= 1) {
                dm = domain[0];
            }
            for (Cookie c : basicCookieStore.getCookies()) {
                if (StringUtils.equals(name, c.getName()) && StringUtils.equals(dm, c.getDomain())) {
                    return c.getValue();
                }
            }
            return null;
        }

        public void pringCookieAll() {
            for (Cookie c : basicCookieStore.getCookies()) {
                System.out.println(c);
            }
        }
    }

    对HttpClient返回的结果进一步封装 

    import java.io.IOException;
    import java.security.cert.CertificateException;
    import java.security.cert.X509Certificate;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;

    import javax.net.ssl.SSLContext;
    import javax.net.ssl.TrustManager;
    import javax.net.ssl.X509TrustManager;

    import org.apache.commons.lang.StringUtils;
    import org.apache.http.Header;
    import org.apache.http.HttpResponse;
    import org.apache.http.NameValuePair;
    import org.apache.http.client.HttpClient;
    import org.apache.http.client.entity.UrlEncodedFormEntity;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.conn.ClientConnectionManager;
    import org.apache.http.conn.scheme.Scheme;
    import org.apache.http.conn.scheme.SchemeRegistry;
    import org.apache.http.conn.ssl.SSLSocketFactory;
    import org.apache.http.cookie.Cookie;
    import org.apache.http.impl.client.BasicCookieStore;
    import org.apache.http.impl.client.DefaultHttpClient;
    import org.apache.http.message.BasicHeader;
    import org.apache.http.message.BasicNameValuePair;
    import org.apache.http.protocol.BasicHttpContext;
    import org.apache.http.protocol.HttpContext;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    /**
     * 对HttpClient返回的结果进一步封装
     * 
    @author bangis.wangdf
     *
     
    */
    public class HttpResult {
        
        private static Logger LOG = LoggerFactory.getLogger(HttpResult.class);
        
        private static Pattern headerCharsetPattern = Pattern.compile(
                "charset=((gb2312)|(gbk)|(utf-8))", 2);
        private static Pattern pattern = Pattern
                .compile(
                        "<meta[^>]*content=(['"])?[^>]*charset=((gb2312)|(gbk)|(utf-8))\1[^>]*>",
                        2);
        private String headerCharset;
        private String headerContentType;
        private String headerContentEncoding;
        private List<Header> headers;
        private String metaCharset;
        private byte[] response;
        private String responseUrl;
        private int statuCode = -1;
        private static final int BUFFER_SIZE = 4096;

        public static HttpResult empty() {
            return new HttpResult();
        }

        public String getHeaderCharset() {
            return this.headerCharset;
        }

        public String getHeaderContentType() {
            return this.headerContentType;
        }

        public final List<Header> getHeaders() {
            return this.headers;
        }

        public String getHtml() {
            try {
                return getText();
            } catch (UnsupportedEncodingException e) {
                LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
            }
            return "";
        }
        
        public String getHtml(String encoding) {
            try {
                return getText(encoding);
            } catch (UnsupportedEncodingException e) {
                LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
            }
            return "";
        }

        public String getMetaCharset() {
            return this.metaCharset;
        }

        public byte[] getResponse() {
            return Arrays.copyOf(this.response, this.response.length);
        }

        public String getResponseUrl() {
            return this.responseUrl;
        }

        public int getStatuCode() {
            return this.statuCode;
        }

        public String getText() throws UnsupportedEncodingException {
            return getText("");
        }

        public String getText(String encoding) throws UnsupportedEncodingException {
            if (this.response == null){
                return "";
            }
            String encodingStr = encoding;
            if (StringUtils.isBlank(encoding)){
                encodingStr = this.metaCharset;
            }

            if (StringUtils.isBlank(encoding)){
                encodingStr = this.headerCharset;
            }

            if (StringUtils.isBlank(encoding)){
                encodingStr = "UTF-8";
            }

            return new String(this.response, encodingStr);
        }

        private String getCharsetFromMeta() {
            StringBuilder builder = new StringBuilder();
            String charset = "";
            for (int i = 0; (i < this.response.length) && ("".equals(charset)); ++i) {
                char c = (charthis.response[i];
                switch (c) {
                case '<':
                    builder.delete(0, builder.length());
                    builder.append(c);
                    break;
                case '>':
                    if (builder.length() > 0){
                        builder.append(c);
                    }
                    String meta = builder.toString();

                    if (meta.toLowerCase().startsWith("<meta")){
                        charset = getCharsetFromMeta(meta);
                    }
                    break;
                case '=':
                default:
                    if (builder.length() > 0){
                        builder.append(c);
                    }
                }

            }

            return charset;
        }

        private String getCharsetFromMeta(String meta) {
            if (StringUtils.isBlank(meta)){
                return "";
            }
            Matcher m = pattern.matcher(meta);
            if (m.find()){
                return m.group(2);
            }
            return "";
        }

        private void getHttpHeaders(HttpResponse httpResponse) {
            String headerName = "";
            String headerValue = "";
            int index = -1;

            Header[] rspHeaders = httpResponse.getAllHeaders();
            for (int i = 0; i < rspHeaders.length; ++i) {
                Header header = rspHeaders[i];
                this.headers.add(header);

                headerName = header.getName();
                if ("Content-Type".equalsIgnoreCase(headerName)) {
                    headerValue = header.getValue();
                    index = headerValue.indexOf(';');
                    if (index > 0){
                        this.headerContentType = headerValue.substring(0, index);
                    }
                    Matcher m = headerCharsetPattern.matcher(headerValue);
                    if (m.find()){
                        this.headerCharset = m.group(1);
                    }
                }

                if ("Content-Encoding".equalsIgnoreCase(headerName)){
                    this.headerContentEncoding = header.getValue();
                }
            }
        }

        private void getResponseUrl(HttpContext httpContext) {
            HttpHost target = (HttpHost) httpContext
                    .getAttribute("http.target_host");

            HttpUriRequest req = (HttpUriRequest) httpContext
                    .getAttribute("http.request");

            this.responseUrl = target.toString() + req.getURI().toString();
        }

        public HttpResult(HttpContext httpContext, HttpResponse httpResponse) {
            this.headers = new ArrayList<Header>();

            this.statuCode = httpResponse.getStatusLine().getStatusCode();

            if (httpContext != null) {
                getResponseUrl(httpContext);
            }

            if (httpResponse != null) {
                getHttpHeaders(httpResponse);
                try {
                    if (("gzip".equalsIgnoreCase(this.headerContentEncoding))
                            || ("deflate".equalsIgnoreCase(this.headerContentEncoding))) {
                        GZIPInputStream is = new GZIPInputStream(httpResponse.getEntity().getContent());
                        ByteArrayOutputStream os = new ByteArrayOutputStream();
                        byte[] buffer = new byte[BUFFER_SIZE];
                        int count = 0;
                        while ((count = is.read(buffer)) > 0){
                            os.write(buffer, 0, count);
                        }
                        this.response = os.toByteArray();
                        os.close();
                        is.close();
                    }else{
                        this.response = EntityUtils.toByteArray(httpResponse.getEntity());
                    }
                } catch (Exception e) {
                    LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
                }
                if (this.response != null){
                    this.metaCharset = getCharsetFromMeta();
                }
            }
        }

        private HttpResult() {
        }
    }

     

    Mail163Test 

    import java.text.MessageFormat;
    import java.util.HashMap;
    import java.util.Map;

    import org.apache.http.Header;
    import org.apache.http.message.BasicHeader;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;

    public class Mail163Test {
        public static final String SESSION_INIT = "http://mail.163.com";
        public static final String LOGIN_URL = "https://ssl.mail.163.com/entry/coremail/fcg/ntesdoor2?df=webmail163&from=web&funcid=loginone&iframe=1&language=-1&net=t&passtype=1&product=mail163&race=-2_-2_-2_db&style=-1&uid=";
        public static final String MAIL_LIST_URL = "http://twebmail.mail.163.com/js4/s?sid={0}&func=mbox:listMessages";
        /**
         * 
    @param args
         
    */
        public static void main(String[] args) {
            HttpClientHelper hc = new HttpClientHelper(true);
            HttpResult lr = hc.get(SESSION_INIT);// 目的是得到 csrfToken 类似
            
    // 拼装登录信息
            Map<String, String> data = new HashMap<String, String>();
            data.put("url2", "http://mail.163.com/errorpage/err_163.htm");
            data.put("savelogin", "0");
            data.put("username", "bangis");
            data.put("password", "*******");
            lr = hc.post(LOGIN_URL, data,setHeader());// 执行登录
            Document doc = Jsoup.parse(lr.getHtml());
            String sessionId=doc.select("script").html().split("=")[2];
            sessionId = sessionId.substring(0,sessionId.length()-2);
            data.clear();
            data.put("var", "<?xml version="1.0"?><object><int name="fid">1</int><boolean name="skipLockedFolders">false</boolean><string name="order">date</string><boolean name="desc">true</boolean><int name="start">0</int><int name="limit">50</int><boolean name="topFirst">true</boolean><boolean name="returnTotal">true</boolean><boolean name="returnTag">true</boolean></object>");
            lr = hc.post(MessageFormat.format(MAIL_LIST_URL, sessionId),
                    data,setQueryHeader(sessionId));// 执行登录
            System.out.println(lr.getHtml());
        }
        
        public static Header[] setHeader() {
            Header[] result = { 
                    new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"), 
                    new BasicHeader("Accept-Encoding","gzip, deflate"),
                    new BasicHeader("Accept-Language","zh-CN"),
                    new BasicHeader("Cache-Control","no-cache"),
                    new BasicHeader("Connection","Keep-Alive"),
                    new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
                    new BasicHeader("Host","ssl.mail.163.com"),
                    new BasicHeader("Referer","http://mail.163.com/"),
                    new BasicHeader("Accept","text/html, application/xhtml+xml, */*")
                    
            };
            return result;
        }
        public static Header[] setQueryHeader(String sessionId) {
            Header[] result = { 
                    new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"), 
                    new BasicHeader("Accept-Encoding","gzip, deflate"),
                    new BasicHeader("Accept-Language","zh-CN"),
                    new BasicHeader("Cache-Control","no-cache"),
                    new BasicHeader("Connection","Keep-Alive"),
                    new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
                    new BasicHeader("Host","twebmail.mail.163.com"),
                    new BasicHeader("Referer","http://twebmail.mail.163.com/js4/index.jsp?sid="+sessionId),
                    new BasicHeader("Accept","text/javascript")
                    
            };
            return result;
        }
    }

  • 相关阅读:
    vs2017默认以管理员运行
    net abp core的appservice中访问httpcontext对象
    .net core 支持apk下载
    EF Core 2.1变化
    .Net 高效开发之不可错过的实用工具
    win10 远程出现身份验证错误 要求的函数不受支持
    分享个百度网盘下载工具
    mysql迁移sqlserver
    2020.08.11 【ABAP随笔】-ITS Mobile 配置
    2020.05.07 【ABAP随笔】- ABAP-SM30删除前检查
  • 原文地址:https://www.cnblogs.com/cuizhf/p/3725952.html
Copyright © 2011-2022 走看看