zoukankan      html  css  js  c++  java
  • httpclient:Ip 代理

    参考:http://blog.csdn.net/sdfiiiiii/article/details/70432060  http://blog.csdn.net/qy20115549/article/details/54945974

    第一篇博客可以获取http://www.xicidaili.com/网站上所有的代理ip,并测试可不可以用(貌似不是很准),可用的代理ip放到一个list中

    第二篇博客是直接将代理ip设置进代码内,可以用作测试ip可不可用

    第一篇博客

    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.28</version>
    </dependency>
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
    import com.alibaba.fastjson.JSONObject;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * 获取代理IP,需要
     * com.alibaba.fastjson.JSONObject以及Jsoup
     */
    public class ProxyCralwerUnusedVPN {
    
        ThreadLocal<Integer> localWantedNumber = new ThreadLocal<Integer>();
        ThreadLocal<List<ProxyInfo>> localProxyInfos = new ThreadLocal<List<ProxyInfo>>();
    
        public static void main(String[] args) {
            ProxyCralwerUnusedVPN proxyCrawler = new ProxyCralwerUnusedVPN();
            /**
             * 想要获取的代理IP个数,由需求方自行指定。(如果个数太多,将导致返回变慢)
             */
            proxyCrawler.startCrawler(1);
        }
    
        /**
         * 暴露给外部模块调用的入口
         * @param wantedNumber 调用方期望获取到的代理IP个数
         */
        public String startCrawler(int wantedNumber) {
            localWantedNumber.set(wantedNumber);
    
            kuaidailiCom("http://www.xicidaili.com/nn/", 15);
            kuaidailiCom("http://www.xicidaili.com/nt/", 15);
            kuaidailiCom("http://www.xicidaili.com/wt/", 15);
            kuaidailiCom("http://www.kuaidaili.com/free/inha/", 15);
            kuaidailiCom("http://www.kuaidaili.com/free/intr/", 15);
            kuaidailiCom("http://www.kuaidaili.com/free/outtr/", 15);
    
            /**
             * 构造返回数据
             */
            ProxyResponse response = new ProxyResponse();
            response.setSuccess("true");
            Map<String, Object> dataInfoMap = new HashMap<String, Object>();
            dataInfoMap.put("numFound", localProxyInfos.get().size());
            dataInfoMap.put("pageNum", 1);
            dataInfoMap.put("proxy", localProxyInfos.get());
            response.setData(dataInfoMap);
            String responseString = JSONObject.toJSON(response).toString();
            System.out.println(responseString);
            return responseString;
        }
    
        private void kuaidailiCom(String baseUrl, int totalPage) {
            String ipReg = "\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} \d{1,6}";
            Pattern ipPtn = Pattern.compile(ipReg);
    
            for (int i = 1; i < totalPage; i++) {
                if (getCurrentProxyNumber() >= localWantedNumber.get()) {
                    return;
                }
                try {
                    Document doc = Jsoup.connect(baseUrl + i + "/")
                            .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                            .header("Accept-Encoding", "gzip, deflate, sdch")
                            .header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6")
                            .header("Cache-Control", "max-age=0")
                            .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")
                            .header("Cookie", "Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1462812244; _gat=1; _ga=GA1.2.1061361785.1462812244")
                            .header("Host", "www.kuaidaili.com")
                            .header("Referer", "http://www.kuaidaili.com/free/outha/")
                            .timeout(30 * 1000)
                            .get();
                    Matcher m = ipPtn.matcher(doc.text());
    
                    while (m.find()) {
                        if (getCurrentProxyNumber() >= localWantedNumber.get()) {
                            break;
                        }
                        String[] strs = m.group().split(" ");
                        if (checkProxy(strs[0], Integer.parseInt(strs[1]))) {
                            System.out.println("获取到可用代理IP	" + strs[0] + "	" + strs[1]);
                            addProxy(strs[0], strs[1], "http");
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    
        private static boolean checkProxy(String ip, Integer port) {
            try {
                //http://1212.ip138.com/ic.asp 可以换成任何比较快的网页
                Jsoup.connect("http://1212.ip138.com/ic.asp")
                        .timeout(2 * 1000)
                        .proxy(ip, port)
                        .get();
                return true;
            } catch (Exception e) {
                return false;
            }
        }
    
        private int getCurrentProxyNumber() {
            List<ProxyInfo> proxyInfos = localProxyInfos.get();
            if (proxyInfos == null) {
                proxyInfos = new ArrayList<ProxyInfo>();
                localProxyInfos.set(proxyInfos);
                return 0;
            }
            else {
                return proxyInfos.size();
            }
        }
        private void addProxy(String ip, String port, String protocol){
            List<ProxyInfo> proxyInfos = localProxyInfos.get();
            if (proxyInfos == null) {
                proxyInfos = new ArrayList<ProxyInfo>();
                proxyInfos.add(new ProxyInfo(ip, port, protocol));
            }
            else {
                proxyInfos.add(new ProxyInfo(ip, port, protocol));
            }
        }
    }
    
    
    
    class ProxyInfo {
        private String userName = "";
        private String ip;
        private String password = "";
        private String type;
        private String port;
        private int is_internet = 1;
        public ProxyInfo(String ip, String port, String type) {
            this.ip = ip;
            this.type = type;
            this.port = port;
        }
        public String getUserName() {
            return userName;
        }
        public void setUserName(String userName) {
            this.userName = userName;
        }
        public String getIp() {
            return ip;
        }
        public void setIp(String ip) {
            this.ip = ip;
        }
        public String getPassword() {
            return password;
        }
        public void setPassword(String password) {
            this.password = password;
        }
        public String getType() {
            return type;
        }
        public void setType(String type) {
            this.type = type;
        }
        public String getPort() {
            return port;
        }
        public void setPort(String port) {
            this.port = port;
        }
        public int getIs_internet() {
            return is_internet;
        }
        public void setIs_internet(int is_internet) {
            this.is_internet = is_internet;
        }
    }
    
    class ProxyResponse {
        private String success;
        private Map<String, Object> data;
        public String getSuccess() {
            return success;
        }
        public void setSuccess(String success) {
            this.success = success;
        }
        public Map<String, Object> getData() {
            return data;
        }
        public void setData(Map<String, Object> data) {
            this.data = data;
        }
    }

     第二篇博客

    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.UnsupportedEncodingException;
    import java.net.InetSocketAddress;
    import java.net.MalformedURLException;
    import java.net.Proxy;
    import java.net.URL;
    import java.net.URLConnection;
    
    public class GetHtml {
        public static void main(String[] args) throws UnsupportedEncodingException {
            //输入代理ip,端口,及所要爬取的url
            gethtml("121.61.101.222",808,"http://club.autohome.com.cn/bbs/forum-c-2533-1.html?orderby=dateline&qaType=-1");
    
        }
        public static String gethtml(String ip,int port,String url) throws UnsupportedEncodingException{
            URL url1 = null;
            try {
                url1 = new URL(url);
            } catch (MalformedURLException e1) {
                e1.printStackTrace();
            }
            InetSocketAddress addr = null;
            //代理服务器的ip及端口
            addr = new InetSocketAddress(ip, port);
            Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http proxy
            InputStream in = null;
            try {
                URLConnection conn = url1.openConnection(proxy);
                conn.setConnectTimeout(3000);
                in = conn.getInputStream();
            } catch (Exception e) {
                System.out.println("ip " + " is not aviable");//异常IP
            }
    
            String s = convertStreamToString(in);
            System.out.println(s);
            return s;
    
        }
        public static String convertStreamToString(InputStream is) throws UnsupportedEncodingException {
            if (is == null)
                return "";
            BufferedReader reader = new BufferedReader(new InputStreamReader(is,"gb2312"));
            StringBuilder sb = new StringBuilder();
            String line = null;
            try {
                while ((line = reader.readLine()) != null) {
                    sb.append(line + "/n");
                }
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            return sb.toString();
    
        }
    }
  • 相关阅读:
    jsp mysql 配置线程池
    服务端 模拟 检测 攻击。。乱写
    硕思闪客精灵 7.2 破解版
    unity UnityAwe 插件
    smartfoxserver 2x 解决 Math NAN
    unity 断点下载
    java 监听文件目录修改
    wind7 64 setup appjs
    sfs2x 修改jvm 内存
    unity ngui 解决图层问题
  • 原文地址:https://www.cnblogs.com/Michael2397/p/7821930.html
Copyright © 2011-2022 走看看