zoukankan      html  css  js  c++  java
  • 自动获取代理IP信息的例子,含代码,分享哦,

    		/// <summary>
    		/// 读取URL数据内容
    		/// </summary>
    		/// <param name="url">网址</param>
    		/// <returns>网站文本内容</returns>
    		public string HttpGetText(string url)
    		{
    			HttpWebRequest Request = (HttpWebRequest)HttpWebRequest.Create(url);
    			Request.Method = "GET";
    			Request.ContentType = @"application/x-www-form-urlencoded";
    			Request.Accept = @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
    			Request.Headers.Add(HttpRequestHeader.AcceptLanguage, @"Accept-Language:zh-CN,zh;q=0.8");
    			Request.UserAgent = @"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0";
    			Request.Proxy = proxy;
    			var stream = Request.GetResponse().GetResponseStream();
    			var read = new StreamReader(stream);
    			var json = read.ReadToEnd();
    			read.Close();
    			stream.Close();
    
    			System.Diagnostics.Debug.WriteLine("".PadRight(20, '='));
    			System.Diagnostics.Debug.WriteLine(json);
    			System.Diagnostics.Debug.WriteLine("".PadRight(20, '='));
    			return json;
    		}
    
    public class IPs {
    			public List<proxy> items = new List<proxy>();
    
    			public class proxy {
    				public string ip;
    				public int port;
    				public string address;
    				public int speed;
    				public int life;//持续分钟数
    				public DateTime check_time;
    			}
    		}
    		private void button1_Click(object sender, EventArgs e)
    		{
    			var html= HttpGetText("http://www.xicidaili.com/nt");
    			int i1= html.IndexOf("<table id="ip_list">");
    			int i2= html.IndexOf("</table>");
    			string ip_list = html.Substring(i1, i2 - i1+ "</table>".Length);
    			var find = new Regex(@"<tr.*?>s*?<td.*?>.*?</td>s*?<td.*?>(?<ip>.*?)</td>s*?<td.*?>(?<port>.*?)</td>s*?<td.*?>s*?<a.*?>(?<address>.*?)</a>s*?</td>.*?(?<speed>.*?)%.*?<td>(?<life>.*?)</td>.*?<td>(?<check_time>.*?)</td>.*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    			var ips= find.Matches(ip_list);
    			listView1.BeginUpdate();
    			foreach (System.Text.RegularExpressions.Match item in ips) {
    				try
    				{
    					var ip = new IPs.proxy();
    					ListViewItem lvi = new ListViewItem(item.Groups["ip"].Value); ip.ip = item.Groups["ip"].Value;
    					lvi.SubItems.Add(item.Groups["port"].Value); ip.port = Convert.ToInt32(item.Groups["port"].Value);
    					lvi.SubItems.Add(item.Groups["address"].Value); ip.address = item.Groups["address"].Value;
    					lvi.SubItems.Add(item.Groups["speed"].Value); ip.speed = Convert.ToInt32(item.Groups["speed"].Value);
    					lvi.SubItems.Add(item.Groups["life"].Value); ip.life = conv(item.Groups["life"].Value);
    					lvi.SubItems.Add(item.Groups["check_time"].Value); ip.check_time = Convert.ToDateTime(item.Groups["check_time"].Value);
    					listView1.Items.Add(lvi);
    					IPaddress.items.Add(ip);
    				}
    				catch {
    					LogAdd("转换IP地址信息出错 " + item.Value);
    				}
    			}
    			listView1.EndUpdate();
    			int conv(string life) {
    				int a = 1;
    				if (life.Contains("天")) {
    					a = 60 * 24;
    					life = life.Replace("天", "");
    				}else if (life.Contains("分钟"))
    				{
    					a =1;
    					life = life.Replace("分钟", "");
    				}
    				else if (life.Contains("小时"))
    				{
    					a = 60;
    					life = life.Replace("小时", "");
    				}
    				return Convert.ToInt32(life)*a;
    			}
    		}
    

      

    关键代码就是获取指定网页里的IP代理信息,然后用正则表达式提取出来

    本来想着直接将html转换为xml,谁知它网页写的不标准,转换不成功

    只有用正则来查找了,效果不错~

    代码运行环境: vs2017

    当然老版本也可以,将局部函数代码放到外部即可。

    效果图:

    关键代码部分:

    var html= HttpGetText("http://www.xicidaili.com/nt");
    			int i1= html.IndexOf("<table id="ip_list">");
    			int i2= html.IndexOf("</table>");
    			string ip_list = html.Substring(i1, i2 - i1+ "</table>".Length);
    			var find = new Regex(@"<tr.*?>s*?<td.*?>.*?</td>s*?<td.*?>(?<ip>.*?)</td>s*?<td.*?>(?<port>.*?)</td>s*?<td.*?>s*?<a.*?>(?<address>.*?)</a>s*?</td>.*?(?<speed>.*?)%.*?<td>(?<life>.*?)</td>.*?<td>(?<check_time>.*?)</td>.*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    			var ips= find.Matches(ip_list);
    

      正则表达式推荐一个网址及学习工具:

    http://deerchao.net/tutorials/regex/regex.htm#charclass

    我本人也记不住 正则表达式  ,需要用的时候现查。

  • 相关阅读:
    XML 2—— XML文档的元素、属性、实体
    XML 1—— 概述
    Eclipse文件 常用快捷键
    JDBC 7—— PreparedStatement优势
    JDBC 6—— 针对不同表的通用查询操作
    JDBC 5—— 查的操作
    模板复习题目
    第四次博客作业
    oo第三次博客作业
    oo第二次博客作业
  • 原文地址:https://www.cnblogs.com/fxyc87/p/6738425.html
Copyright © 2011-2022 走看看