zoukankan      html  css  js  c++  java
  • 花嫁喜铺数据抓取

     private void saveImage()
      {
       string baseUri = "
    http://www.hjxsy.com/hjxp/showimg.asp?id=";
       
       for(int _number = 1; _number < 300; _number ++)
       {
        try
        {
         WebClient client = new WebClient();
         client.DownloadFile(baseUri+_number.ToString(),@"C:\Inetpub\wwwroot\TestWebServices\hj\"+_number.ToString()+".jpg");
        }
        catch
        {}
       }
      }

      private void saveInfo()
      {
       SqlConnection con = new SqlConnection(System.Configuration.ConfigurationSettings.AppSettings["SqlConnectStringPortal"]);
       SqlCommand cmd = new SqlCommand("insert into marriage values(@id,@name,@priceold,@pricenew,@unit,@intro)",con);
       cmd.Parameters.Add("@id",SqlDbType.Int);
       cmd.Parameters.Add("@name",SqlDbType.NVarChar);
       cmd.Parameters.Add("@priceold",SqlDbType.Money);
       cmd.Parameters.Add("@pricenew",SqlDbType.Money);
       cmd.Parameters.Add("@unit",SqlDbType.NVarChar);
       cmd.Parameters.Add("@intro",SqlDbType.NVarChar);
       con.Open();
       for(int i=1;i<300;i++)
       {
        try
        {
         WebClient client = new WebClient();
         Stream stm = client.OpenRead(txtUri.Text+i.ToString());
         StreamReader sdr = new StreamReader(stm,Encoding.Default);
         string line = sdr.ReadToEnd();

         int pm1 = line.IndexOf(@"品名:")+3;
         string pm2 = line.Substring(pm1);
         int pm3 = pm2.IndexOf(@"</td>");
         string _pm = pm2.Substring(0,pm3).Trim();

         int firstYuan = pm2.IndexOf(@"¥")+1;
         string priceOne = pm2.Substring(firstYuan);
         int _firstYuan = priceOne.IndexOf(@" ");
         string _priceOne = priceOne.Substring(0,_firstYuan).Trim();

         string unit = priceOne.Substring(_firstYuan) + 1;
         int _unit = unit.IndexOf(@"<br>");
         string _unitOne = unit.Substring(0,_unit).Trim();

         int secondYuan = unit.IndexOf(@"¥")+1;
         string priceTwo = unit.Substring(secondYuan);
         int _secondYuan = priceTwo.IndexOf(@" ");
         string _priceTwo = priceTwo.Substring(0,_secondYuan).Trim();

         int intro1 = priceTwo.IndexOf(@"85%")+9;
         string intro2 = priceTwo.Substring(intro1);
         int intro3 = intro2.IndexOf(@"</td>");
         string _intro = intro2.Substring(0,intro3).Replace(@"&nbsp;"," ").Trim();

         cmd.Parameters["@id"].Value = i;
         cmd.Parameters["@name"].Value = _pm;
         cmd.Parameters["@priceold"].Value = Convert.ToDecimal(_priceOne);
         cmd.Parameters["@pricenew"].Value = Convert.ToDecimal(_priceTwo);
         cmd.Parameters["@unit"].Value = _unitOne;
         cmd.Parameters["@intro"].Value = _intro;
         cmd.ExecuteNonQuery();
         stm.Close();
        }
        catch
        {}
       }
       con.Close();
      }


    上海热线 -- wojiehun.com

      private void getInfo2()
      {
       StreamReader sr = new StreamReader(@"C:\Inetpub\wwwroot\TestWebServices\meirong.txt",Encoding.Default);
       string line = sr.ReadToEnd();
       int flag1;
       int flag2;
       int flag3;
       int flag4;
       int flag5;
       int flag6;
       string str1;
       string str2;
       string str3;
       string str4;
       string str5;
       string str6;
       
       do
       {
        flag1 = line.IndexOf(@"http");
        str1 = line.Substring(flag1);
        flag2 = str1.IndexOf(@"htm")+3;
        str2 = str1.Substring(0,flag2);

        flag3 = str1.IndexOf(@"htm>");
        str3 = str1.Substring(flag3);
        flag4 = str3.IndexOf(@"</a>");
        str4 = str3.Substring(4,flag4);

        WebClient client = new WebClient();
        Stream stm = client.OpenRead(str2);
        StreamReader sdr = new StreamReader(stm,Encoding.Default);
        string _line = sdr.ReadToEnd();
        
        try
        {
         flag5 = _line.IndexOf(@"<!--enpcontent-->") +17;
         str5 = _line.Substring(flag5);
         flag6 = str5.IndexOf(@"<script") -1;
         str6 = str5.Substring(0,flag6);
         str6 = str6.Remove(str6.IndexOf(@"<TABLE"),102);

         string title = System.Web.HttpUtility.UrlEncode(str4,Encoding.GetEncoding("GB2312"));
         string contenttype = System.Web.HttpUtility.UrlEncode("html", Encoding.GetEncoding("GB2312"));
         string typeid = System.Web.HttpUtility.UrlEncode("5", Encoding.GetEncoding("GB2312"));
         string nfrom = System.Web.HttpUtility.UrlEncode("", Encoding.GetEncoding("GB2312"));
         string zznews = System.Web.HttpUtility.UrlEncode("", Encoding.GetEncoding("GB2312"));
         string about = System.Web.HttpUtility.UrlEncode("", Encoding.GetEncoding("GB2312"));
         string tjnews = System.Web.HttpUtility.UrlEncode("", Encoding.GetEncoding("GB2312"));
         string shenghe = System.Web.HttpUtility.UrlEncode("checkbox", Encoding.GetEncoding("GB2312"));
         string content = System.Web.HttpUtility.UrlEncode(str6, Encoding.GetEncoding("GB2312"));

         String postStr = @"?";
         postStr = postStr + "&title=" + title;
         postStr = postStr + "&contenttype=" + contenttype;
         postStr = postStr + "&typeid=" + typeid;
         postStr = postStr + "&nfrom=" + nfrom;
         postStr = postStr + "&zznews=" + zznews;
         postStr = postStr + "&about=" + about;
         postStr = postStr + "&tjnews=" + tjnews;
         postStr = postStr + "&shenghe=" + shenghe;
         postStr = postStr + "&content=" + content;

         WebClient command = new WebClient();
         Stream st = command.OpenRead(postStr);
         st.Close();
        }
        catch
        {}
        stm.Close();
        line = str3;
       }
       while(line.Length >93);
      }

    作者:thanks       微信:-       QQ:305380844
             
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。
  • 相关阅读:
    MySQL锁(阻塞)
    MySQL锁类型(一致性是非锁定读、自增和外键)
    MySQL锁算法(行锁的三种算法以及解决幻读问题)
    MySQL锁概述
    MySQL锁问题(脏读、不可重复读、幻读)
    MySQL默认隔离级别对应解决的三种问题
    简单动态字符串
    限流
    # SpringBoot自定义线程池
    & 生产环境mysql问题记录
  • 原文地址:https://www.cnblogs.com/thanks/p/19746.html
Copyright © 2011-2022 走看看