zoukankan      html  css  js  c++  java
  • C# 保持登陆状态并抓取数据(转)

    string url1 = @"http://login.vancl.com/Login/";                   //提交注册信息的网址
                string url2 = @"http://my.vancl.com/User/User_Order_List.aspx";    //需要下载的网页

                string indata = "mailto:Logintrue=true&UserName=jsycywm@hotmail.com&PassWord=我的密码&x=45&y=15";

                CookieContainer myCookieContainer = new CookieContainer();                      //新建一个CookieContainer来存放Cookie集合
                HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url1);      //新建一个HttpWebRequest
                myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
                myHttpWebRequest.ContentLength = indata.Length;
                //myHttpWebRequest.ac
                myHttpWebRequest.Method = "POST";
                myHttpWebRequest.CookieContainer = myCookieContainer;                           //设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
                Stream myRequestStream = myHttpWebRequest.GetRequestStream();
                StreamWriter myStreamWriter = new StreamWriter(myRequestStream, Encoding.GetEncoding("GB2312"));
                myStreamWriter.Write(indata);           //把数据写入HttpWebRequest的Request流

                myStreamWriter.Close();
                myRequestStream.Close();                //关闭打开对象


                HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();        //新建一个HttpWebResponse
                myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);      //获取一个包含url的Cookie集合的CookieCollection
                Stream myResponseStream = myHttpWebResponse.GetResponseStream();
                StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.UTF8 );
                string outdata = myStreamReader.ReadToEnd();//把数据从HttpWebResponse的Response流中读出
                myStreamReader.Close();
                myResponseStream.Close();
                //Console.WriteLine(outdata);//显示"登录"
                //拿到了Cookie,再进行请求就能直接读取到登录后的内容了
                myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url2);
                myHttpWebRequest.CookieContainer = myCookieContainer;//*
                //刚才那个CookieContainer已经存有了Cookie,把它附加到HttpWebRequest中则能直接通过验证
                myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
                myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
                myResponseStream = myHttpWebResponse.GetResponseStream();
                myStreamReader = new StreamReader(myResponseStream, Encoding.UTF8);
                outdata = myStreamReader.ReadToEnd();
                myStreamReader.Close();
                myResponseStream.Close();
                //Response.Redirect(url2);
                //Console.WriteLine(outdata);
                //再次显示"登录"
                //如果把*行注释调,就显示"没有登录"
                string abc = outdata.Substring(413, outdata.Length - 413 - 11); //.Replace("title", "tag")
                //  abc = Regex.Replace(abc,"<!--User_Interface End-->.*?<!--Bottom-->" , "");//"title=\".*?\"|title='.*?'|<img.*?\">"
                Match mc = Regex.Match(abc, "<table>.*?/table>");//<div id=\"main\">.*?</div>  Regex.Matches(abc, "<table>.*?/table>")[0].Value
                abc = Regex.Replace(mc.Value, "<thead>.*?thead>", "");
             // MatchCollection results
     
                XmlDocument xmlDoc = new XmlDocument();
                xmlDoc.LoadXml(abc );
                XmlNode xn = xmlDoc.SelectSingleNode("table");
                XmlElement xe = (XmlElement)xn;
                //xn.ChildNodes[0].ChildNodes[1].InnerText 
                //xn.ChildNodes.Count
                string result = xe.InnerText;

  • 相关阅读:
    chrome浏览器中安装以及使用Elasticsearch head 插件
    windows10 升级并安装配置 jmeter5.3
    linux下部署Elasticsearch6.8.1版本的集群
    【Rollo的Python之路】Python 爬虫系统学习 (八) logging模块的使用
    【Rollo的Python之路】Python 爬虫系统学习 (七) Scrapy初识
    【Rollo的Python之路】Python 爬虫系统学习 (六) Selenium 模拟登录
    【Rollo的Python之路】Python 爬虫系统学习 (五) Selenium
    【Rollo的Python之路】Python 爬虫系统学习 (四) XPath学习
    【Rollo的Python之路】Python 爬虫系统学习 (三)
    【Rollo的Python之路】Python sys argv[] 函数用法笔记
  • 原文地址:https://www.cnblogs.com/love2wllw/p/1709963.html
Copyright © 2011-2022 走看看