zoukankan      html  css  js  c++  java
  • WebClient下载代理的IP+Port获取方法

    在做下载的时候通常会用到代理!

    如下获取下载代理的代码,其中通过cnproxy,heibai还有proxy360得到的代理IP,里面也有些获取的限制,具体怎么解决的见代码吧!

    View Code
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Diagnostics;
    using System.Net;
    using System.IO;
    using System.Threading.Tasks;
    using System.Threading;
    using System.Data.SqlClient;
    using System.Data;
    using HtmlAgilityPack;
    using System.Web;
    using System.Text.RegularExpressions;
    
    namespace ProxyTools
    {
        public class ProxyTest
        {
            public static bool Error = false;
    
            public int GetProxyTotal = 0;
            public int ProxyHashCount = 0;
            public List<string> proxyResultList;
    
            private int RepeateTimes = 5;       //下载测试次数
            private int MaxThread = 300;        //最大线程数
            private int FileLength;             //下载内容大小
            private readonly ProxyDetail[] Proxys;      //代理类数组
            private Uri TestUri = new Uri("http://www.baidu.com/");         //下载baidu.com来测试代理
    
            Regex ipPattern = new Regex(@"^\d+\.\d+\.\d+\.\d+$", RegexOptions.Compiled);
            Regex numPattern = new Regex(@"^\d+$", RegexOptions.Compiled);
            Regex numCnPattern = new Regex(@"(?<word>[a-z])=""(?<num>\d)""", RegexOptions.Compiled);
            Regex ipPortPattern = new Regex(@"(?is)(?<ip>\d+\.\d+\.\d+\.\d+)[^+]*?(?<word>(\+[a-z])+)", RegexOptions.Compiled);
            Regex dailiipPattern = new Regex(@"(?is)<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td>.*?<td>(?<port>\d+)</td>", RegexOptions.Compiled);
            Regex xkerPattern = new Regex(@"(?is)(?<ip>(\d{1,3}\.){3}\d{1,3})([:\s]|(</?div>.*?(port\d*"">)))(?<port>\d+)", RegexOptions.Compiled);
            Regex daili18Pattern = new Regex(@"(?is)<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td><td>(?<port>\d+)", RegexOptions.Compiled);
            Regex wl35Pattern = new Regex(@"(?is)<td[^>]*?>(?<ip>(\d{1,3}\.){3}\d{1,3})\s*</td>[^>]*>(?<port>\d+)", RegexOptions.Compiled);
            Regex proxiedPattern = new Regex(@"<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td>\s*?<td[^>]*>(?<port>\d+)</td>", RegexOptions.Compiled);
            Regex numnntimePattern = new Regex(@"(?<word>[a-z])=(?<num>\d)", RegexOptions.Compiled);
            Regex nntimePattern = new Regex(@"<td>(?<ip>(\d{1,3}\.){3}\d{1,3}).*?(?<word>(\+[a-z])*)\)", RegexOptions.Compiled);
            Regex realboosterPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)</td>", RegexOptions.Compiled);
            Regex simpleproxylistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}).*?(?<word>(&#\d{2})+)</td>", RegexOptions.Compiled);
    
            public ProxyTest(int maxThread,int repeateTimes,string downUrl,CheckProxyWeb checkWeb)
            {
                this.MaxThread = maxThread;
                this.RepeateTimes = repeateTimes;
                this.TestUri = new Uri(downUrl);
    
                FileLength = GetFileLength();           //得到下载测试内容的大小
    
                if (FileLength == 0)
                {
                    Error = true;
                    return;
                }
    
                string[] pArray = GetProxyFromWeb(checkWeb).ToArray();   //读取需要测试的代理列表
    
                List<ProxyDetail> pList = new List<ProxyDetail>();
    
                foreach (var item in pArray)
                {
                    WebProxy proxy;
                    try { proxy = new WebProxy(item); }
                    catch { continue; }
    
                    ProxyDetail pDetail = new ProxyDetail();
                    pDetail.Proxy = proxy;
                    pDetail.ProxyString = item;
                    pList.Add(pDetail);
                }
    
                proxyResultList = pList.Select(a => a.ProxyString).ToList();
                ProxyHashCount = pList.Count;
                Proxys = pList.ToArray();           //得到需要测试的代理列表
            }
    
            //得到下载测试内容的大小
            private int GetFileLength()
            {
                try
                {
    
                    WebClient client = new WebClient();
                    byte[] datas = client.DownloadData(TestUri);
                    return datas.Length;
                }
                catch { return 0; }
            }
    
            //从cnProxy得到代理地址
            private HashSet<string> GetFromCnproxy()
            {
                HashSet<string> proxyHash = new HashSet<string>();
    
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                for (int pageIndex = 1; pageIndex < 11; ++pageIndex)
                {
                    Uri uri = new Uri(string.Format("http://www.cnproxy.com/proxy{0}.html", pageIndex));
    
                    client.Headers.Add("Referer", uri.AbsoluteUri);
                    string content;
                    try
                    {
                        content = Encoding.GetEncoding("gb2312").GetString(client.DownloadData(uri));
                    }
                    catch { continue; }
                    //该网站的代理端口是通过js得到的
                    Dictionary<string, string> wordToNum = new Dictionary<string, string>();
                    foreach (Match m in numCnPattern.Matches(content))
                        if (!wordToNum.ContainsKey(m.Groups["word"].Value))
                            wordToNum.Add(m.Groups["word"].Value, m.Groups["num"].Value);
    
                    //正则匹配到代理地址等
                    foreach (Match m in ipPortPattern.Matches(content))
                    {
                        try
                        {
                            string ip = m.Groups["ip"].Value + ":";
                            string[] words = m.Groups["word"].Value.Split(new char[] { '+' }, StringSplitOptions.RemoveEmptyEntries);
                            foreach (var word in words)
                                ip += wordToNum[word];
                            proxyHash.Add(ip);
                        }
                        catch { }
                    }
                }
                return proxyHash;
            }
    
            //通过heibai网站得到代理地址
            private HashSet<string> GetFromHeibai()
            {
                HashSet<string> ipHash = new HashSet<string>();
                HtmlDocument htmlDoc = new HtmlDocument();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                //得到十页的代理内容
                for (int pageIndex = 1; pageIndex < 11; ++pageIndex)
                {
                    Uri uri = new Uri(string.Format("http://www.heibai.net/proxy/index.php?act=list&port=&type=&country=&page={0}", pageIndex));
    
                    client.Headers.Add("Referer", uri.AbsoluteUri);
                    byte[] bytes ;string content ;
                    try
                    {
                        bytes = client.DownloadData(uri);
                        content = WebUtility.HtmlDecode(Encoding.GetEncoding("utf-8").GetString(bytes));
                    }
                    catch { continue; }
    
                    htmlDoc.LoadHtml(content);
    
                    //get paraNodes part
                    var paraNodes = htmlDoc.DocumentNode.SelectNodes(@"//tr[@class='cells']");
    
                    if (paraNodes == null)
                        return ipHash;
    
                    HtmlDocument htmlD = new HtmlDocument();
    
                    foreach (var node in paraNodes)
                    {
                        htmlD.LoadHtml(node.OuterHtml);
                        var ipNode = htmlD.DocumentNode.SelectSingleNode(@"//td[2]");
                        var portNode = htmlD.DocumentNode.SelectSingleNode(@"//td[3]");
    
                        if ((ipNode != null && ipPattern.IsMatch(ipNode.InnerText.Trim()) &&
                        portNode != null) && numPattern.IsMatch(portNode.InnerText.Trim()))
                            ipHash.Add(ipNode.InnerText.Trim() + ":" + portNode.InnerText.Trim());
                    }
                }
                return ipHash;
            }
    
            //通过proxy360得到代理地址
            private HashSet<string> GetFromProxy360()
            {
                HashSet<string> ipHash = new HashSet<string>();
                HtmlDocument htmlDoc = new HtmlDocument();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://www.proxy360.cn/default.aspx");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("utf-8").GetString(bytes));
    
                htmlDoc.LoadHtml(content);
    
                //get paraNodes part
                var paraNodes = htmlDoc.DocumentNode.SelectNodes(@"//div[@class='proxylistitem']");
    
                if (paraNodes == null)
                    return ipHash;
    
                HtmlDocument htmlD = new HtmlDocument();
    
                foreach (var node in paraNodes)
                {
                    htmlD.LoadHtml(node.OuterHtml);
                    var ipNode = htmlD.DocumentNode.SelectSingleNode(@"//span[@class='tbBottomLine'][1]");
                    var portNode = htmlD.DocumentNode.SelectSingleNode(@"//span[@class='tbBottomLine'][2]");
    
                    if ((ipNode != null && ipPattern.IsMatch(ipNode.InnerText.Trim()) &&
                        portNode != null) && numPattern.IsMatch(portNode.InnerText.Trim()))
                        ipHash.Add(ipNode.InnerText.Trim() + ":" + portNode.InnerText.Trim());
                }
    
                return ipHash;
            }
    
            //通过dailiip网站得到代理地址
            private HashSet<string> GetFromDailiip()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://www.dailiip.com/");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
                foreach (Match m in dailiipPattern.Matches(content))
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                }
                return ipHash;
            }
    
            //通过xker网站得到代理地址
            private HashSet<string> GetFromXker()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://www.xker.com/ip/");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
                foreach (Match m in xkerPattern.Matches(content))
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                }
                return ipHash;
            }
    
            //通过18daili网站得到代理地址
            private HashSet<string> GetFrom18daili()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://www.18daili.com/SearchLocationForAjax.php");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
                foreach (Match m in daili18Pattern.Matches(content))
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                }
                return ipHash;
            }
    
            //通过35wl网站得到代理地址
            private HashSet<string> GetFrom35wl()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://www.35wl.com/tools/dlfwq.htm");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
                foreach (Match m in wl35Pattern.Matches(content))
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                }
                return ipHash;
            }
    
            //通过51proxied网站得到代理地址
            private HashSet<string> GetFrom51proxied()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
    
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                List<Uri> uriList = new List<Uri>();
                uriList.Add(new Uri("http://www.51proxied.com/http_fast.html"));
                uriList.Add(new Uri("http://www.51proxied.com/http_anonymous.html"));
                uriList.Add(new Uri("http://www.51proxied.com/http_non_anonymous.html"));
                uriList.Add(new Uri("http://www.51proxied.com/socks5.html"));
                uriList.ForEach(uri =>
                    {
                        client.Headers.Add("Referer", uri.AbsoluteUri);
    
                        byte[] bytes;
                        try { bytes = client.DownloadData(uri); }
                        catch { return; }
                        string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
                        foreach (Match m in proxiedPattern.Matches(content))
                        {
                            ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                        }
                    });
                return ipHash;
            }
    
            //通过nntime网站得到代理地址
            private HashSet<string> GetFromnntime()
            {
    
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://nntime.com/");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
                byte[] bytes;
                try
                {
                    bytes = client.DownloadData(uri);
                }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("iso-8859-1").GetString(bytes));
                Dictionary<string, string> charDic = new Dictionary<string, string>();
                foreach (Match m in numnntimePattern.Matches(content))
                    if (!charDic.ContainsKey(m.Groups["word"].Value))
                        charDic.Add(m.Groups["word"].Value, m.Groups["num"].Value);
    
                foreach (Match m in nntimePattern.Matches(content))
                {
                    try
                    {
                        string ip = m.Groups["ip"].Value + ":";
                        string[] words = m.Groups["word"].Value.Split(new char[] { '+' }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (var word in words)
                            ip += charDic[word];
                        ipHash.Add(ip);
                    }
                    catch { }
                }
                return ipHash;
            }
    
            //通过realbooster网站得到代理地址
            private HashSet<string> GetFromrealbooster()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
    
                Uri uri = new Uri("http://realbooster.com/seo-services/free-proxy-list-tool/");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
    
                foreach (Match m in realboosterPattern.Matches(content))
                {
                    try
                    {
                        ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                    }
                    catch { }
                }
                return ipHash;
            }
    
            //通过simpleproxylist网站得到代理地址
            private HashSet<string> GetFromsimpleproxylist()
            {
    
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
                for (int i = 1; i < 20; ++i)
                {
                    Uri uri = new Uri("http://simpleproxylist.com/search.php?p=" + i.ToString() + "&country=CN");
    
                    client.Headers.Add("Referer", uri.AbsoluteUri);
    
                    byte[] bytes;
                    try { bytes = client.DownloadData(uri); }
                    catch { continue; }
                    string content = HttpUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
    
                    foreach (Match m in simpleproxylistPattern.Matches(content))
                    {
                        try
                        {
                            string ip = m.Groups["ip"].Value + ":";
                            string[] word = m.Groups["word"].Value.Split(new string[] { "&#" }, StringSplitOptions.RemoveEmptyEntries);
                            foreach (var w in word)
                                ip += (char)(int.Parse(w));
                            ipHash.Add(ip);
                        }
                        catch { }
                    }
                }
                return ipHash;
            }
    
            //通过proxy-ip-list网站得到代理地址
            private static HashSet<string> GetFromproxyiplist()
            {
    
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
                Uri uri = new Uri("http://proxy-ip-list.com/");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = HttpUtility.HtmlDecode(HttpUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes)));
    
    
                Regex proxyiplistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)</td>", RegexOptions.Compiled);
    
                foreach (Match m in proxyiplistPattern.Matches(content))
                {
                    try
                    {
                        ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
    
                    }
                    catch { }
                }
    
                return ipHash;
            }
    
            //通过Webs
            private static HashSet<string> GetFromWebs()
            {
                HashSet<string> ipHash = new HashSet<string>();
                WebClient client = new WebClient();
                string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
                string DefaultPostContentType = "application/x-www-form-urlencoded";
    
                client.Credentials = CredentialCache.DefaultCredentials;
                client.Headers.Add("User-Agent", DefaultUserAgent);
                client.Headers.Add("Content-Type", DefaultPostContentType);
                Uri uri = new Uri("http://anonymous-proxy-list.webs.com/");
    
                client.Headers.Add("Referer", uri.AbsoluteUri);
    
                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { return ipHash; }
                string content = HttpUtility.HtmlDecode(HttpUtility.HtmlDecode(Encoding.GetEncoding("ISO-8859-1").GetString(bytes)));
    
    
                Regex proxyiplistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)", RegexOptions.Compiled);
    
                foreach (Match m in proxyiplistPattern.Matches(content))
                {
                    try
                    {
                        ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
    
                    }
                    catch { }
                }
    
                return ipHash;
            }
    
            //在网站上得到新代理地址
            private HashSet<string> GetProxyFromWeb(CheckProxyWeb checkWeb)
            {
                HashSet<string> proxyHash = new HashSet<string>();
                HashSet<string> hash;
    
                ///cnproxy
                if (checkWeb.CnProxy)
                {
                    hash = GetFromCnproxy();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///heibai
                if (checkWeb.Heibai)
                {
                    hash = GetFromHeibai();
                    GetProxyTotal += hash.Count;
                    foreach(var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///proxy360
                if (checkWeb.Proxy360)
                {
                    hash = GetFromProxy360();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///dailiip
                if (checkWeb.Dailiip)
                {
                    hash = GetFromDailiip();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///xker
                if (checkWeb.Xker)
                {
                    hash =  GetFromXker();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///18daili
                if (checkWeb.Daili18)
                {
                    hash = GetFrom18daili();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///35wl
                if (checkWeb.Wl35)
                {
                    hash = GetFrom35wl();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///51proxied
                if (checkWeb.Proxied51)
                {
                    hash = GetFrom51proxied();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///nntime
                if (checkWeb.Nntime)
                {
                    hash = GetFromnntime();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///realbooster
                if (checkWeb.Realbooster)
                {
                    hash =  GetFromrealbooster();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///simpleproxylist
                if (checkWeb.Simpleproxylist)
                {
                    hash = GetFromsimpleproxylist();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///proxyiplist
                if (checkWeb.Proxyiplist)
                {
                    hash =  GetFromproxyiplist();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
                ///webs
                if (checkWeb.Webs)
                {
                    hash = GetFromWebs();
                    GetProxyTotal += hash.Count;
                    foreach (var proxy in hash)
                        proxyHash.Add(proxy);
                }
    
                return proxyHash;
            }
    
            //测试代理是否可用
            public void Start(ProxyHelp help)
            {
    
                ParallelOptions taskParallet = new ParallelOptions();
                taskParallet.MaxDegreeOfParallelism = MaxThread;
                int count = 0;
    
                for (int i = 0; i < RepeateTimes; i++)
                {
                    Parallel.ForEach(Proxys, taskParallet, current =>
                    {
                        if (current.Error != null)
                            return;
    
                        Stopwatch sw = new Stopwatch();
                        WebClient wc = new WebClient();
                        InitWebClient(wc, TestUri, TestUri);
                        wc.Proxy = current.Proxy;
                        sw.Restart();
                        try
                        {
                            byte[] data = wc.DownloadData(TestUri);
                            sw.Stop();
                            current.Milliseconds = sw.ElapsedMilliseconds;
                            current.ContentLength = data.Length;
    
                            if (current.ContentLength != FileLength)
                                current.Error = new Exception("下载错误");
    
                            string log = string.Format("{0} - {1} - {2} - {3}", current.ProxyString, current.ContentLength, current.Milliseconds, count);
                            help.AddText(log);
                            //Console.WriteLine(log);
                        }
                        catch (Exception e)
                        {
                            current.Error = e;
                            string log = string.Format("{0} - {1} - {2}", current.ProxyString, e.Message, count);
                            help.AddText(log);
                            //Console.WriteLine(log);
                        }
    
                        Interlocked.Increment(ref count);
                    });
    
                    Array.Sort(Proxys, new Comparison<ProxyDetail>((a, b) => b.Milliseconds.CompareTo(a.Milliseconds)));
                }
    
                SaveProxy();
                help.AddText("测试代理地址结束!");
                Console.WriteLine("结束");
            }
    
            //保存代理地址
            private void SaveProxy()
            {
                //在这里可以排序取前多少可以使用的
                //Array.Sort(Proxys, new Comparison<ProxyDetail>((a, b) => a.Milliseconds.CompareTo(b.Milliseconds)));
    
                //这里取了下载时间小于10000毫秒的代理
                //for (int i = 0; i < Proxys.Length; ++i)
                //    if (Proxys[i].Error == null && Proxys[i].Milliseconds < 10000)
                //        proxyList.Add(new ProxyTable(Proxys[i].ProxyString));
    
                proxyResultList = Proxys.Where(a => a.Error == null).Select(b=>b.ProxyString).ToList();
            }
    
            private const string DefaultUserAgent = @"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; CIBA; .NET4.0C; .NET4.0E)";
            private const string DefaultPostContentType = "application/x-www-form-urlencoded";
    
            //初始化WebClient
            private static void InitWebClient(WebClient webClient, Uri uri, Uri pUri)
            {
                webClient.Credentials = CredentialCache.DefaultCredentials;
                webClient.Headers.Add("User-Agent", DefaultUserAgent);
                webClient.Headers.Add("Content-Type", DefaultPostContentType);
                webClient.Headers.Add("Referer", pUri.AbsoluteUri);
                ServicePoint servicePoint = ServicePointManager.FindServicePoint(uri);
                if (servicePoint.Expect100Continue == true)
                    servicePoint.Expect100Continue = false;
            }
    
        }
    
        /// <summary>
        /// 测试Proxy时需要用到的一些参数
        /// </summary>
        public class ProxyDetail
        {
            public WebProxy Proxy;
            public string ProxyString;
            public int ContentLength;
            public long Milliseconds;
            public Exception Error;
        }
    
        public class CheckProxyWeb
        {
            public bool CnProxy;
            public bool Heibai;
            public bool Proxy360;
            public bool Dailiip;
            public bool Xker;
            public bool Daili18;
            public bool Wl35;
            public bool Proxied51;
            public bool Nntime;
            public bool Realbooster;
            public bool Simpleproxylist;
            public bool Proxyiplist;
            public bool Webs;
        }
    }

    其中调用的代码就很简单了

    由于我是隔段时间获取一次,所以用到了Timer的内容,也贴在这里了

    View Code
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Runtime.InteropServices;
    using System.Threading;
    using System.IO;

    namespace ProxyTest
    {
    class Program
    {

    static System.Timers.Timer proxyTimer = new System.Timers.Timer();

    static void Main(string[] args)
    {

    proxyTimer = new System.Timers.Timer();
    proxyTimer.Interval = 1000 * 60;
    proxyTimer.Elapsed += new System.Timers.ElapsedEventHandler(DoProxyTest);
    proxyTimer.Enabled = true;
    proxyTimer.Start();

    Thread.Sleep(-1);
    }

    static bool proxyIsRunning = false;

    static void DoProxyTest(object sender, System.Timers.ElapsedEventArgs e)
    {
    Write("Start to running ProxyTest function!");
    if (proxyIsRunning)
    {
    Write("End ProxyTest function! by : ProxyTest is running now!" + Environment.NewLine);
    return;
    }

    proxyTimer.Interval = 1000 * 60 * 60 * 24; //每天执行一次

    int rowCount = Pmars.DataBaseHelper.GetHelper().GetRowCount("ProxyTable");
    Write("ProxyTable Count:" + rowCount);
    Pmars.ProxyTest proxy = new Pmars.ProxyTest();
    proxy.Start();
    rowCount = Pmars.DataBaseHelper.GetHelper().GetRowCount("ProxyTable");
    Write("ProxyTable Count:" + rowCount);

    Write("End ProxyTest function!" + Environment.NewLine);
    }

    static void Write(string contents)
    {
    File.AppendAllText(@"log/proxyLog.txt", DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " : " + contents + Environment.NewLine);
    }
    }
    }

    第一部分的代码后来改了很多,也用不到timer部分的代码了,但是下面的timer部分的代码就不删掉了,稍微改下其实就可以的!

    贴在这里,学习一下!




  • 相关阅读:
    Handler机制来处理子线程去更新UI线程控件
    获得某月份的天数
    listview选中没有效果
    kali或其他系统,虚拟机中不能加载镜像
    tomcat开启多个端口
    kali自定义分辨率
    Redis 安装手册
    bash检查centos服务器运行状态
    关于利用RD client远程电脑,和输入法的一些问题
    centOS下 MYSQL基本操作
  • 原文地址:https://www.cnblogs.com/pmars/p/2327877.html
Copyright © 2011-2022 走看看