zoukankan      html  css  js  c++  java
  • 网页登陆,网页采集基础类

    昨天在博客园上看到了关于秒杀的文章,通过模拟网页数据传送的方式达到更快的网页访问操作!有些时候模拟网页访问还是蛮有用的,比如自动登陆,网页采集等等。下面的几个方法就是我用到的几个类,虽然方法比较少,但基本的要求还是能满足的。

        public static class HtmlHelper
        {
            /// <summary>
            /// 获得基础流
            /// </summary>
            /// <param name="uri">网址</param>
            /// <param name="cc">cookie容器,可以为NULL</param>
            /// <returns></returns>
            public static Stream GetBaseStream(string uri, CookieContainer cc)
            {
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);   //用指定Uri创建一个request
                    if (cc != null)
                    {
                        request.CookieContainer = cc;
                    }
                    //浏览器欺骗
                    request.ContentType = "application/x-www-form-urlencoded";
                    request.Accept = @"application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
                    request.UserAgent = @"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.9 Safari/533.2 ChromePlus/1.3.9.0";
    
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();      //根据创建的request得到响应response
                    Stream responseStream = response.GetResponseStream();  //创建一个流来获得响应体
                    return responseStream;
                }
                catch (Exception ex)
                {
                    MessageBox.Show(@"操作失败:" + ex.Message);
                    return null;
                }
            }
    
            /// <summary>
            /// 获得网页
            /// </summary>
            /// <param name="uri">网址</param>
            /// <param name="postDate"></param>
            /// <param name="cc">cookie容器,可以为null</param>
            /// <param name="encoding">网页编码</param>
            /// <returns></returns>
            public static string GetHtmlString(string uri, string postDate, CookieContainer cc, Encoding encoding)
            {
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
                    request.ContentType = "application/x-www-form-urlencoded";
                    request.AllowAutoRedirect = true;
                    request.Accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
                    request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.9 Safari/533.2 ChromePlus/1.3.9.0";
                    request.CookieContainer = cc;     //设置request产生cookie的容器
                    if (postDate != null)
                    {
                        request.Method = "Post";
                        byte[] byterequest = Encoding.UTF8.GetBytes(postDate);
                        request.ContentLength = byterequest.Length;
                        using (Stream stream = request.GetRequestStream())
                        {
                            stream.Write(byterequest, 0, byterequest.Length);
                        }
    
                    }
    
                    using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                    {
                        using (Stream responsestream = response.GetResponseStream())
                        {
                            StreamReader sr = new StreamReader(responsestream, encoding);
                            string html = sr.ReadToEnd();
                            return html;
                        }
                    }
                }
                catch (Exception ex)
                {
                    MessageBox.Show(@"发生错误:" + ex.Message);
                    return null;
                }
            }
    
            /// <summary>
            /// 从字符串中返回匹配多个的集合值(网页抽取特定部分有效)
            /// </summary>
            /// <param name="start">开始html tag</param>
            /// <param name="end">结束html tag</param>
            /// <param name="html">html</param>
            /// <returns></returns>
            public static List<string> GetStrings(string start, string end, string html)
            {
                List<string> list = new List<string>();
                try
                {
                    string pattern = string.Format("{0}(?<g>(.|[\r\n])+?){1}", start, end);//匹配URL的模式,并分组    //理解这个正则
                    MatchCollection mc = Regex.Matches(html, pattern);//满足pattern的匹配集合
                    if (mc.Count != 0)
                    {
                        foreach (Match match in mc)
                        {
                            GroupCollection gc = match.Groups;
                            list.Add(gc["g"].Value);
                        }
                    }
                }
                catch
                { }
                return list;
            }
    
            /// <summary>
            /// 中文url编码
            /// </summary>
            /// <param name="buffer"></param>
            /// <returns></returns>
            public static string UrlEncode(string buffer)
            {
                byte[] bty = Encoding.Default.GetBytes(buffer);
                StringBuilder builder = new StringBuilder();
                for (int i = 0; i < bty.Length; i++)
                {
                    builder.Append("%");
                    builder.Append(bty[i].ToString("x2"));
                }
                return builder.ToString();
            }
        }
  • 相关阅读:
    A+B Problem
    迭代平方根
    猴子报数
    分数求和
    猴子吃桃
    钻石
    杨辉三角形
    MYSQL 5.7 修改密码、登录问题
    SQL 语句 explain 分析
    重构CMDB,避免运维之耻
  • 原文地址:https://www.cnblogs.com/qianlifeng/p/1821399.html
Copyright © 2011-2022 走看看