zoukankan      html  css  js  c++  java
  • .net抓取网页数据

    1、想通过代码获得某个页面的数据,首先根据右键查看页面源代码,通过分析。再通过下面代码,修改,一步步查找出所需内容,存入数据库。

     //根据Url地址得到网页的html源码 
            private string GetWebContent(string Url)
            {
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    //声明一个HttpWebRequest请求 
                    request.Timeout = 30000;
                    //设置连接超时时间 
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding = Encoding.GetEncoding("utf-8");
                    StreamReader streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch
                {
                    MessageBox.Show("出错");
                }
                return strResult;
            }
    
            private void button1_Click(object sender, EventArgs e)
            {
                //要抓取的URL地址 
                string Url = "http://kxt.com/data/20.html";    //☆☆☆☆☆☆☆☆☆☆ 
                //得到指定Url的源码 
                string strWebContent = GetWebContent(Url);
                richTextBox1.Text = strWebContent;
                //取出和数据有关的那段源码 
                int iBodyStart = strWebContent.IndexOf("<body", 0);
                int iStart = strWebContent.IndexOf("历史数据", iBodyStart);
                int iTableStart = strWebContent.IndexOf("<ul", iStart);
                int iTableEnd = strWebContent.IndexOf("</ul>", iTableStart);
                string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 5);
                //生成HtmlDocument 
                WebBrowser webb = new WebBrowser();
                webb.Navigate("about:blank");
                HtmlDocument htmldoc = webb.Document.OpenNew(true);
                htmldoc.Write(strWeb);
                HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("li");
                int i = 0;
                foreach (HtmlElement tr in htmlTR)
                {
                    i++;
                    if (i == 1 )
                    {
                        continue;
                    }
                    if (i ==  htmlTR.Count - 2)
                    {
                        break;
                    }
                    HtmlElementCollection spans = tr.GetElementsByTagName("span");
                    
                    string dateTime = spans[0].InnerText;
                    
                    string netWeightOunce = spans[1].InnerText;
                    string netWeightTon = spans[2].InnerText;
                    string totalValue = spans[3].InnerText;
                    string regulation = spans[4].InnerText;
                    //string affectOil = spans[5].InnerText;
                    
    
                    //Id, UpdateTime, NetWeightOunce, NetWeightTon, TotalValue, Regulation, FinanceTime
                    SqlServer ado=new SqlServer();
                    ado.AddField("UpdateTime",DateTime.Now);
                    ado.AddField("NetWeightOunce", netWeightOunce);
                    ado.AddField("NetWeightTon", netWeightTon);
                    ado.AddField("TotalValue", totalValue);
                   // ado.AddField("EffectOil", affectOil);
                    ado.AddField("Regulation", regulation);
                    ado.AddField("FinanceTime", Convert.ToDateTime(dateTime).ToString("yyyy-MM-dd"));//☆☆☆☆☆☆☆☆☆☆
    
                    ado.Insert("Silver");//☆☆☆☆☆☆☆☆☆☆
                   
                }
               
                MessageBox.Show("OK");
    
            } 
  • 相关阅读:
    Android 百度地图开发(一)--- 申请API Key和在项目中显示百度地图
    Session,Cookie,jsessionid,Url重写
    PHP输出当前进程所有变量 / 常量 / 模块 / 函数 / 类
    table自适应宽度
    python学习笔记1(字符串操作)
    jquery-select选中
    dos命令大全
    dos命令之创建文件,文件夹
    PHP中逻辑运算符的高效用法---&&和||
    apache vhost
  • 原文地址:https://www.cnblogs.com/zhaoyihao/p/4699212.html
Copyright © 2011-2022 走看看