zoukankan      html  css  js  c++  java
  • 利用HttpWebRequest抓取网页数据

    根据运单号来获取运单状态 以EMS快递为例:

    第一步首先分析要抓取网站的入口及参数 http://www.ems.com.cn

    第二部利用httpwebRequest 模拟浏览器请求来获取网页内容

    上图可以看到 获取验证码请求

    http://www.ems.com.cn/ems/rand

    查询

    可以看到 发送的 post请求 http://www.ems.com.cn/ems/order/singleQuery_t

     public void getcode()
            {
    
              
                
                string code = "";
                HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create("http://www.ems.com.cn/ems/rand");
                hwr.Timeout = 30000;
                //hwr.ReadWriteTimeout = 5 * 1000;
                HttpWebResponse res;
                try
                {
                    res = (HttpWebResponse)hwr.GetResponse();
                    string[] s = res.Headers["Set-Cookie"].ToString().Split(';');
                    _cookieConE = s[0].ToString();
                    _cookieConE2 = s[2].ToString();
                }
                catch (WebException ex)
                {
                    //
                     n++;
                    if( n<=3)//同一个运单如果验证码获取三次后还是有异常就跳出进行下一个运单获取
                    {
                        getcode();
    
                    }
                    res = (HttpWebResponse)ex.Response;
                }
               
    
                
                Bitmap bmp = new Bitmap(res.GetResponseStream());
                Bitmap bit2 = Crop(bmp, 5, 0, 78, 20);
                Base.YZ y = new Base.YZ(bit2);
                y.GrayByPixels(); //灰度处理
                y.GetPicValidByValue(128, 6); //得到有效空间
                Bitmap[] pics = y.GetSplitPics(6, 1);     //分割
                for (int i = 0; i < pics.Length; i++)
                {
                    code = code + "%" + y.GetSingleBmpCode(pics[i], 128);
                }
                string[] arry = code.Split('%');
                result = y.CheckNumber(arry);
                if (result.Length < 6)//获取验证码有成功率 
                {
                    n++;
                    getcode();
                }
                n = 0;
    
             
                #endregion
            }
    getread获取验证码
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Drawing;
    using System.Drawing.Imaging;
    using System.Runtime.InteropServices;
    
    namespace zsy.Base
    {
        public class YZ
        {
            public Bitmap bmpobj;
            public YZ(Bitmap pic)
            {
                bmpobj = new Bitmap(pic);    //转换为Format32bppRgb
            }
            const string _num1 = "00000000000000000000000000000000000000000000111000000001111100000000111110000000000011000000000001100000000000110000000000011000000000001100000000000110000000000011000000000001100000000111111110000011111111000000000000000000000000000000000000000000000000000000";
            const string _num0 = "00000000000000000000000000000000000000000000111000000001111111000000110001100000110000011000011000001100001100000110000110000011000011000001100001100000110000110000011000001100011000000111111100000000111000000000000000000000000000000000000000000000000000000000";
            const string _num9 = "00000000000000000000000000000000000000000001111000000001111111000001110001100000110000011000011000001100001110001110000011111111000000111101100000000000110000000000110000001000011000000111111000000001111000000000000000000000000000000000000000000000000000000000";
            const string _num3 = "00000000000000000000000000000000000000000011111000000011111111000001000001100000000000110000000000110000000111110000000011111100000000000111000000000001100000000000110000010000111000001111111000000011111000000000000000000000000000000000000000000000000000000000";
            const string _num7 = "00000000000000000000000000000000000000000011111111000001111111100000000000110000000000010000000000011000000000011000000000001000000000001100000000000100000000000110000000000011000000000011000000000001100000000000000000000000000000000000000000000000000000000000";
            const string _num5 = "00000000000000000000000000000000000000000111111110000011111111000001100000000000110000000000011000000000001111100000000111111100000000000111000000000001100000000000110000010000111000001111111000000011111000000000000000000000000000000000000000000000000000000000";
            const string _num2 = "00000000000000000000000000000000000000000011111000000011111110000001000001100000000000110000000000011000000000011000000000011000000000011000000000011000000000011000000000011000000000001111111100000111111110000000000000000000000000000000000000000000000000000000";
            const string _num6 = "00000000000000000000000000000000000000000000111100000000111111000000110000100000011000000000011000000000001101111000000111111110000011100011100001100000110000110000011000001100011100000111111100000000111100000000000000000000000000000000000000000000000000000000";
            const string _num4 = "00000000000000000000000000000000000000000000001100000000001110000000000111000000000111100000000110110000000011011000000011001100000001100110000001111111110000111111111000000000110000000000011000000000001100000000000000000000000000000000000000000000000000000000";
            const string _num8 = "00000000000000000000000000000000000000000001111100000001111111000000110001100000011000110000001110010000000011111000000001111100000001100111000001100000110000110000011000011100011100000111111100000001111100000000000000000000000000000000000000000000000000000000";
            public string CheckNumber(string[] number)
            {
                string b = "";
                for (int i = 1; i < number.Length; i++)
                {
                    if (number[i].ToString() == _num0)
                    {
                        b = b + "0";
                        continue;
                    }
                    if (number[i].ToString() == _num1)
                    {
                        b = b + "1";
                    }
                    if (number[i].ToString() == _num2)
                    {
                        b = b + "2";
                    }
                    if (number[i].ToString() == _num3)
                    {
                        b = b + "3";
                    }
                    if (number[i].ToString() == _num4)
                    {
                        b = b + "4";
                    }
                    if (number[i].ToString() == _num5)
                    {
                        b = b + "5";
                    }
                    if (number[i].ToString() == _num6)
                    {
                        b = b + "6";
                    }
                    if (number[i].ToString() == _num7)
                    {
                        b = b + "7";
                    }
                    if (number[i].ToString() == _num8)
                    {
                        b = b + "8";
                    }
                    if (number[i].ToString() == _num9)
                    {
                        b = b + "9";
                    }
                }
                return b;
            }
    
            /// <summary>
            /// 根据RGB,计算灰度值
            /// </summary>
            /// <param name="posClr">Color值</param>
            /// <returns>灰度值,整型</returns>
            private int GetGrayNumColor(System.Drawing.Color posClr)
            {
                return (posClr.R * 19595 + posClr.G * 38469 + posClr.B * 7472) >> 16;
            }
    
            /// <summary>
            /// 灰度转换,逐点方式
            /// </summary>
            public void GrayByPixels()
            {
                for (int i = 0; i < bmpobj.Height; i++)
                {
                    for (int j = 0; j < bmpobj.Width; j++)
                    {
                        int tmpValue = GetGrayNumColor(bmpobj.GetPixel(j, i));
                        bmpobj.SetPixel(j, i, Color.FromArgb(tmpValue, tmpValue, tmpValue));
                    }
                }
            }
    
            /// <summary>
            /// 去图形边框
            /// </summary>
            /// <param name="borderWidth"></param>
            public void ClearPicBorder(int borderWidth)
            {
                for (int i = 0; i < bmpobj.Height; i++)
                {
                    for (int j = 0; j < bmpobj.Width; j++)
                    {
                        if (i < borderWidth || j < borderWidth || j > bmpobj.Width - 1 - borderWidth || i > bmpobj.Height - 1 - borderWidth)
                            bmpobj.SetPixel(j, i, Color.FromArgb(255, 255, 255));
                    }
                }
            }
    
            /// <summary>
            /// 灰度转换,逐行方式
            /// </summary>
            public void GrayByLine()
            {
                Rectangle rec = new Rectangle(0, 0, bmpobj.Width, bmpobj.Height);
                BitmapData bmpData = bmpobj.LockBits(rec, ImageLockMode.ReadWrite, bmpobj.PixelFormat);// PixelFormat.Format32bppPArgb);
                //    bmpData.PixelFormat = PixelFormat.Format24bppRgb;
                IntPtr scan0 = bmpData.Scan0;
                int len = bmpobj.Width * bmpobj.Height;
                int[] pixels = new int[len];
                Marshal.Copy(scan0, pixels, 0, len);
    
                //对图片进行处理
                int GrayValue = 0;
                for (int i = 0; i < len; i++)
                {
                    GrayValue = GetGrayNumColor(Color.FromArgb(pixels[i]));
                    pixels[i] = (byte)(Color.FromArgb(GrayValue, GrayValue, GrayValue)).ToArgb();      //Color转byte
                }
    
                bmpobj.UnlockBits(bmpData);
            }
    
            /// <summary>
            /// 得到有效图形并调整为可平均分割的大小
            /// </summary>
            /// <param name="dgGrayValue">灰度背景分界值</param>
            /// <param name="CharsCount">有效字符数</param>
            /// <returns></returns>
            public void GetPicValidByValue(int dgGrayValue, int CharsCount)
            {
                int posx1 = bmpobj.Width; int posy1 = bmpobj.Height;
                int posx2 = 0; int posy2 = 0;
                for (int i = 0; i < bmpobj.Height; i++)      //找有效区
                {
                    for (int j = 1; j < bmpobj.Width; j++)
                    {
                        int pixelValue = bmpobj.GetPixel(j, i).R;
                        if (pixelValue < dgGrayValue)     //根据灰度值
                        {
                            if (posx1 > j) posx1 = j;
                            if (posy1 > i) posy1 = i;
    
                            if (posx2 < j) posx2 = j;
                            if (posy2 < i) posy2 = i;
                        };
                    };
                };
                // 确保能整除
                int Span = CharsCount - (posx2 - posx1 + 1) % CharsCount;   //可整除的差额数
                if (Span < CharsCount)
                {
                    int leftSpan = Span / 2;    //分配到左边的空列 ,如span为单数,则右边比左边大1
                    if (posx1 > leftSpan)
                        posx1 = posx1 - leftSpan;
                    if (posx2 + Span - leftSpan < bmpobj.Width)
                        posx2 = posx2 + Span - leftSpan;
                }
                //复制新图
                //Rectangle cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
                //bmpobj = bmpobj.Clone(cloneRect, bmpobj.PixelFormat);
            }
    
            /// <summary>
            /// 得到有效图形,图形为类变量
            /// </summary>
            /// <param name="dgGrayValue">灰度背景分界值</param>
            /// <param name="CharsCount">有效字符数</param>
            /// <returns></returns>
            public void GetPicValidByValue(int dgGrayValue)
            {
                int posx1 = bmpobj.Width; int posy1 = bmpobj.Height;
                int posx2 = 0; int posy2 = 0;
                for (int i = 0; i < bmpobj.Height; i++)      //找有效区
                {
                    for (int j = 0; j < bmpobj.Width; j++)
                    {
                        int pixelValue = bmpobj.GetPixel(j, i).R;
                        if (pixelValue < dgGrayValue)     //根据灰度值
                        {
                            if (posx1 > j) posx1 = j;
                            if (posy1 > i) posy1 = i;
    
                            if (posx2 < j) posx2 = j;
                            if (posy2 < i) posy2 = i;
                        };
                    };
                };
                //复制新图
                Rectangle cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
                bmpobj = bmpobj.Clone(cloneRect, bmpobj.PixelFormat);
            }
    
            /// <summary>
            /// 得到有效图形,图形由外面传入
            /// </summary>
            /// <param name="dgGrayValue">灰度背景分界值</param>
            /// <param name="CharsCount">有效字符数</param>
            /// <returns></returns>
            public Bitmap GetPicValidByValue(Bitmap singlepic, int dgGrayValue)
            {
                int posx1 = singlepic.Width; int posy1 = singlepic.Height;
                int posx2 = 0; int posy2 = 0;
                for (int i = 0; i < singlepic.Height; i++)      //找有效区
                {
                    for (int j = 0; j < singlepic.Width; j++)
                    {
                        int pixelValue = singlepic.GetPixel(j, i).R;
                        if (pixelValue < dgGrayValue)     //根据灰度值
                        {
                            if (posx1 > j) posx1 = j;
                            if (posy1 > i) posy1 = i;
    
                            if (posx2 < j) posx2 = j;
                            if (posy2 < i) posy2 = i;
                        };
                    };
                };
                //复制新图
                Rectangle cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
                return singlepic.Clone(cloneRect, singlepic.PixelFormat);
            }
    
            /// <summary>
            /// 平均分割图片
            /// </summary>
            /// <param name="RowNum">水平上分割数</param>
            /// <param name="ColNum">垂直上分割数</param>
            /// <returns>分割好的图片数组</returns>
            public Bitmap[] GetSplitPics(int RowNum, int ColNum)
            {
                if (RowNum == 0 || ColNum == 0)
                    return null;
                int singW = bmpobj.Width / RowNum;
                int singH = bmpobj.Height / ColNum;
                Bitmap[] PicArray = new Bitmap[RowNum * ColNum];
    
                Rectangle cloneRect;
                for (int i = 0; i < ColNum; i++)      //找有效区
                {
                    for (int j = 0; j < RowNum; j++)
                    {
                        cloneRect = new Rectangle(j * singW, i * singH, singW, singH);
                        PicArray[i * RowNum + j] = bmpobj.Clone(cloneRect, bmpobj.PixelFormat);//复制小块图
                    }
                }
                return PicArray;
            }
    
            /// <summary>
            /// 返回灰度图片的点阵描述字串,1表示灰点,0表示背景
            /// </summary>
            /// <param name="singlepic">灰度图</param>
            /// <param name="dgGrayValue">背前景灰色界限</param>
            /// <returns></returns>
            public string GetSingleBmpCode(Bitmap singlepic, int dgGrayValue)
            {
                Color piexl;
                string code = "";
                for (int posy = 0; posy < singlepic.Height; posy++)
                    for (int posx = 0; posx < singlepic.Width; posx++)
                    {
                        piexl = singlepic.GetPixel(posx, posy);
                        if (piexl.R < dgGrayValue)    // Color.Black )
                            code = code + "1";
                        else
                            code = code + "0";
                    }
                return code;
            }
        }
    }
    获取图片里的验证吗
     HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);
                    Encoding encoding = Encoding.UTF8;
                    string param = "muMailNum=" + mailCode + "&checkCode=" + code;
                    byte[] bs = Encoding.ASCII.GetBytes(param);
    
                    req.Method = "POST";
                    req.Timeout = 30000;
                    req.ContentType = "application/x-www-form-urlencoded";
                    req.ContentLength = bs.Length;
                    req.CookieContainer = _cookieCon;
                    using (Stream reqStream = req.GetRequestStream())
                    {
                        reqStream.Write(bs, 0, bs.Length);
                        reqStream.Close();
                    }
                    using (HttpWebResponse response = (HttpWebResponse)req.GetResponse())
                    {
                        using (StreamReader reader = new StreamReader(response.GetResponseStream(), encoding))
                        {
                            responseData = reader.ReadToEnd().ToString();
                        }
                    }
    获取整个网页内容

    然后利用正则过滤出想要获得的信息

  • 相关阅读:
    浏览器缓存
    FLINK --- 写HDFS
    ArrayList 线程不安全
    rocketMQ源码之 似乎并不能严格进行顺序消费
    报错 Aray size is not a small enough positive integer 的解决方案
    什么是函数的柯里化?
    props, state与render函数关系 – 数据和页面是如何实现互相联动的?
    【自我管理】my schedule
    【解题报告】洛谷P1219 八皇后
    【解题报告】洛谷P1433 吃奶酪
  • 原文地址:https://www.cnblogs.com/xiaoshitou/p/4651247.html
Copyright © 2011-2022 走看看