zoukankan      html  css  js  c++  java
  • httpClient如何接收格式错误的响应头部信息

    Exception in thread "main" org.apache.commons.httpclient.ProtocolException: Unable to parse header: share memory not exist, need create new share memory!
    at org.apache.commons.httpclient.HttpParser.parseHeaders(HttpParser.java:202)
    at org.apache.commons.httpclient.HttpMethodBase.readResponseHeaders(HttpMethodBase.java:1935)
    at org.apache.commons.httpclient.HttpMethodBase.readResponse(HttpMethodBase.java:1737)
    at org.apache.commons.httpclient.HttpMethodBase.execute(HttpMethodBase.java:1098)
    at org.apache.commons.httpclient.HttpMethodDirector.executeWithRetry(HttpMethodDirector.java:398)
    at org.apache.commons.httpclient.HttpMethodDirector.executeMethod(HttpMethodDirector.java:171)
    at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:397)
    at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:323)

    做网页爬虫的时候, 模拟Get请求, 访问网址, 显示上面的错误异常。 问了3个前辈,都说没遇到过, 这可头疼屎了。

    主要也不知道是什么问题, 有人说可能是buffer,把大小设置大点。花了一个晚上查网上资料, 有关终于有点眉目, 见参考网址 : http://bbs.csdn.net/topics/390178589

    /**
     * 
     */
    package com.http;
    
    import java.io.IOException;
    
    import org.apache.http.Header;
    import org.apache.http.HttpException;
    import org.apache.http.HttpResponse;
    import org.apache.http.HttpResponseFactory;
    import org.apache.http.HttpVersion;
    import org.apache.http.conn.ClientConnectionOperator;
    import org.apache.http.conn.OperatedClientConnection;
    import org.apache.http.conn.scheme.SchemeRegistry;
    import org.apache.http.impl.conn.BasicClientConnectionManager;
    import org.apache.http.impl.conn.DefaultClientConnection;
    import org.apache.http.impl.conn.DefaultClientConnectionOperator;
    import org.apache.http.impl.conn.DefaultHttpResponseParser;
    import org.apache.http.io.HttpMessageParser;
    import org.apache.http.io.SessionInputBuffer;
    import org.apache.http.message.BasicHeader;
    import org.apache.http.message.BasicHttpResponse;
    import org.apache.http.message.BasicLineParser;
    import org.apache.http.message.BasicStatusLine;
    import org.apache.http.message.LineParser;
    import org.apache.http.params.HttpParams;
    import org.apache.http.util.CharArrayBuffer;
    /**
     * @author yingzi
     *
     */
    public class MyBasicClientConnectionManager extends BasicClientConnectionManager {
    
        public MyBasicClientConnectionManager() {
            super();
        }
        
        @Override
        protected ClientConnectionOperator createConnectionOperator( final SchemeRegistry sr) {
            return new MyClientConnectionOperator(sr);
        }
        
    
        
        class MyClientConnection extends DefaultClientConnection {
            @Override
            protected HttpMessageParser createResponseParser(
                    final SessionInputBuffer buffer,
                    final HttpResponseFactory responseFactory,
                    final HttpParams params) {
                return new MyDefaultHttpResponseParser(buffer, new MyLineParser(),
                        responseFactory, params);
            }
        }
        
        class MyDefaultHttpResponseParser extends DefaultHttpResponseParser {
            public MyDefaultHttpResponseParser(SessionInputBuffer buffer,
                    LineParser parser, HttpResponseFactory responseFactory,
                    HttpParams params) {
                super(buffer, parser, responseFactory, params);
            }
            @Override
            protected HttpResponse parseHead(
                final SessionInputBuffer sessionBuffer) throws IOException, HttpException {
                try {
                    return super.parseHead(sessionBuffer);
                } catch (Exception ex) {
                    // 压制ParseException异常
                    return new BasicHttpResponse(new BasicStatusLine(HttpVersion.HTTP_1_1, 200, ""));
                }
            }
        }
        
        class MyClientConnectionOperator extends DefaultClientConnectionOperator {
            public MyClientConnectionOperator(final SchemeRegistry sr) {
                super(sr);
            }
        
            @Override
            public OperatedClientConnection createConnection() {
                return new MyClientConnection();
            }
        }
        
        class MyLineParser extends BasicLineParser {
            @Override
            public Header parseHeader(final CharArrayBuffer buffer) {
                try {
                    return super.parseHeader(buffer);
                } catch (Exception ex) {
                    // 压制ParseException异常
                    return new BasicHeader("invalid", buffer.toString());
                }
            }
        }
    }
    MyBasicClientConnectionManager

    而我用的是MultiThreadedHttpConnectionManager, 不怎么试用, 于是我看日志的异常trac, 查看了httpClient的具体的excute方法里的代码,瞎鸡巴猜想了下, 觉得要把原来的GetMethod改掉用

    /**
     * 
     */
    package com.http;
    
    import java.io.IOException;
    
    import org.apache.commons.httpclient.Header;
    import org.apache.commons.httpclient.HttpConnection;
    import org.apache.commons.httpclient.HttpException;
    import org.apache.commons.httpclient.HttpParser;
    import org.apache.commons.httpclient.HttpState;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    
    
    /**
     * @author yingzi
     *
     */
    public class MyHttpGetMethod extends org.apache.commons.httpclient.methods.GetMethod {
        private static final Logger log = LoggerFactory.getLogger( MyHttpGetMethod.class );
        public MyHttpGetMethod(){
            super();
        }
        
    
        public MyHttpGetMethod(String url){
            super(url);
        }
        
        @Override
        protected void readResponseHeaders(HttpState state, HttpConnection conn)
             throws IOException, HttpException {
                 getResponseHeaderGroup().clear();
                 Header[] headers = {new Header("Connection","Keep-Alive"), new Header("Content-Type","text/html; charset=GB18030"), new Header("Keep-Alive","timeout=20"), new Header("Cache-control","max-age=3600")};
                 try {
                         headers = HttpParser.parseHeaders(
                                 conn.getResponseInputStream(), getParams().getHttpElementCharset());
                    } catch (Exception ex) {
                        // 压制ParseException异常
                        log.warn("response header has some error info , can not parse normally.");
                    }
                 
                 // Wire logging moved to HttpParser
                 getResponseHeaderGroup().setHeaders(headers);
        }
        
    }
  • 相关阅读:
    bzoj 4539 [Hnoi2016]树——主席树+倍增
    bzoj 4137 [FJOI2015]火星商店问题——线段树分治+可持久化01trie树
    bzoj 4025 二分图——线段树分治+LCT
    LOJ 121 「离线可过」动态图连通性——LCT维护删除时间最大生成树 / 线段树分治
    bzoj 3572 [Hnoi2014]世界树——虚树
    bzoj 4650(洛谷 1117) [Noi2016]优秀的拆分——枚举长度的关键点+后缀数组
    洛谷 P3957 跳房子 —— 二分答案+单调队列优化DP
    洛谷 P1578 奶牛浴场 —— 最大子矩形
    bzoj 1510 Kra-The Disks —— 思路
    bzoj 1657 Mooo 奶牛的歌声 —— 单调栈
  • 原文地址:https://www.cnblogs.com/chenyao/p/4083704.html
Copyright © 2011-2022 走看看