zoukankan      html  css  js  c++  java
  • Crawler 爬虫

    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using Crawler.Model;
    
    namespace Crawler.DataService
    {
        public class CategoryRepository //: IRepository<Commodity>
        {
            private Logger logger = new Logger(typeof(CategoryRepository));
    
            public void Save(List<Category> categoryList)
            {
                SqlHelper.InsertList<Category>(categoryList, "Category");
                new Action<List<Category>>(SaveList).BeginInvoke(categoryList, null, null);
            }
    
            /// <summary>
            /// 根据Level获取类别列表
            /// </summary>
            /// <param name="level"></param>
            /// <returns></returns>
            public List<Category> QueryListByLevel(int level)
            {
                string sql = string.Format("SELECT * FROM category WHERE categorylevel={0};", level);
                return SqlHelper.QueryList<Category>(sql);
            }
    
    
            /// <summary>
            /// 存文本记录的
            /// </summary>
            /// <param name="categoryList"></param>
            public void SaveList(List<Category> categoryList)
            {
                StreamWriter sw = null;
                try
                {
                    string recordFileName = string.Format("{0}_Category.txt", DateTime.Now.ToString("yyyyMMddHHmmss"));
                    string totolPath = Path.Combine(ObjectFactory.DataPath, recordFileName);
                    if (!Directory.Exists(Path.GetDirectoryName(totolPath)))
                    {
                        Directory.CreateDirectory(Path.GetDirectoryName(totolPath));
                        sw = File.CreateText(totolPath);
                    }
                    else
                    {
                        sw = File.AppendText(totolPath);
                    }
    
                    sw.WriteLine(JsonConvert.SerializeObject(categoryList));
                }
                catch (Exception e)
                {
                    logger.Error("CategoryRepository.SaveList出现异常", e);
                }
                finally
                {
                    if (sw != null)
                    {
                        sw.Flush();
                        sw.Close();
                        sw.Dispose();
                    }
                }
            }
        }
    }
    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using Crawler.Model;
    
    namespace Crawler.DataService
    {
        public class CommodityRepository //: IRepository<Commodity>
        {
            private Logger logger = new Logger(typeof(CommodityRepository));
    
            public void SaveList(List<Commodity> commodityList)
            {
                if (commodityList == null || commodityList.Count == 0) return;
                IEnumerable<IGrouping<string, Commodity>> group = commodityList.GroupBy<Commodity, string>(c => GetTableName(c));
    
                foreach (var data in group)
                {
                    SqlHelper.InsertList<Commodity>(data.ToList(), data.Key);
                }
            }
    
            private string GetTableName(Commodity commodity)
            {
                return string.Format("JD_Commodity_{0}", (commodity.ProductId % 30 + 1).ToString("000"));
            }
    
            /// <summary>
            /// 保存文本记录
            /// </summary>
            /// <param name="commodityList"></param>
            /// <param name="category"></param>
            /// <param name="page"></param>
            public void SaveList(List<Commodity> commodityList, Category category, int page)
            {
                StreamWriter sw = null;
                try
                {
                    string recordFileName = string.Format("{0}/{1}/{2}/{3}.txt", category.CategoryLevel, category.ParentCode, category.Id, page);
                    string totolPath = Path.Combine(ObjectFactory.DataPath, recordFileName);
                    if (!Directory.Exists(Path.GetDirectoryName(totolPath)))
                    {
                        Directory.CreateDirectory(Path.GetDirectoryName(totolPath));
                        sw = File.CreateText(totolPath);
                    }
                    else
                    {
                        sw = File.AppendText(totolPath);
                    }
                    sw.WriteLine(JsonConvert.SerializeObject(commodityList));
                }
                catch (Exception e)
                {
                    logger.Error("CommodityRepository.SaveList出现异常", e);
                }
                finally
                {
                    if (sw != null)
                    {
                        sw.Flush();
                        sw.Close();
                        sw.Dispose();
                    }
                }
            }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    using Crawler;
    
    namespace Crawler.DataService
    {
        public class DBInit
        {
            private static Logger logger = new Logger(typeof(DBInit));
    
            /// <summary>
            /// 谨慎使用  会全部删除数据库并重新创建!
            /// </summary>
            public void InitCommodityTable()
            {
                #region Delete
                try
                {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 1; i < 31; i++)
                    {
                        sb.AppendFormat("DROP TABLE [dbo].[JD_Commodity_{0}];", i.ToString("000"));
                    }
                    SqlHelper.ExecuteNonQuery(sb.ToString());
                }
                catch (Exception ex)
                {
                    if (ex.Message.Contains("因为它不存在,或者您没有所需的权限。"))
                    {
                        logger.Warn("初始化数据库InitCommodityTable删除的时候,原表不存在");
                    }
                    else
                    {
                        logger.Error("初始化数据库InitCommodityTable失败", ex);
                        throw ex;
                    }
                }
                #endregion Delete
    
                #region Create
                try
                {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 1; i < 31; i++)
                    {
                        sb.AppendFormat(@"CREATE TABLE [dbo].[JD_Commodity_{0}](
                                            [Id] [int] IDENTITY(1,1) NOT NULL,
                                            [ProductId] [bigint] NULL,
                                            [CategoryId] [int] NULL,
                                            [Title] [nvarchar](500) NULL,
                                            [Price] [decimal](18, 2) NULL,
                                            [Url] [varchar](1000) NULL,
                                            [ImageUrl] [varchar](1000) NULL,
                                 CONSTRAINT [PK_JD_Commodity_{0}] PRIMARY KEY CLUSTERED 
                                (
                                    [Id] ASC
                                )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
                                ) ON [PRIMARY];", i.ToString("000"));
                    }
                    SqlHelper.ExecuteNonQuery(sb.ToString());
                }
                catch (Exception ex)
                {
                    logger.Error("InitCommodityTable创建异常", ex);
                    throw ex;
                }
                #endregion Create
            }
    
            /// <summary>
            /// 谨慎使用  会全部删除数据库并重新创建!
            /// </summary>
            public void InitCategoryTable()
            {
                #region Delete
                try
                {
                    StringBuilder sb = new StringBuilder();
                    sb.AppendFormat("DROP TABLE [dbo].[Category];");
                    SqlHelper.ExecuteNonQuery(sb.ToString());
                }
                catch (Exception ex)
                {
                    if (ex.Message.Equals("无法对 表 'dbo.Category' 执行 删除,因为它不存在,或者您没有所需的权限。"))
                    {
                        logger.Warn("初始化数据库InitCategoryTable删除的时候,原表不存在");
                    }
                    else
                    {
                        logger.Error("初始化数据库InitCategoryTable失败", ex);
                        throw ex;
                    }
                }
                #endregion Delete
    
                #region Create
                try
                {
                    StringBuilder sb = new StringBuilder();
                    sb.AppendFormat(@"CREATE TABLE [dbo].[Category](
                                            [Id] [int] IDENTITY(1,1) NOT NULL,
                                            [Code] [varchar](100) NULL,
                                            [ParentCode] [varchar](100) NULL,
                                            [CategoryLevel] [int] NULL,
                                            [Name] [nvarchar](50) NULL,
                                            [Url] [varchar](1000) NULL,
                                            [State] [int] NULL,
                                          CONSTRAINT [PK_Category] PRIMARY KEY CLUSTERED 
                                         (
                                             [Id] ASC
                                         )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
                                         ) ON [PRIMARY];");
    
                    SqlHelper.ExecuteNonQuery(sb.ToString());
                }
                catch (Exception ex)
                {
                    logger.Error("初始化数据库InitCategoryTable 创建失败", ex);
                    throw ex;
                }
                #endregion Create
    
            }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler.DataService
    {
            public interface IRepository<T> where T : class//, new()
            {
                void Save(T entity);
                void SaveList(List<T> entity);
            }
    }
    using System;
    using System.Collections.Generic;
    using System.Configuration;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler.DataService
    {
        public class ObjectFactory
        {
             public static string DataPath = ConfigurationManager.AppSettings["DataPath"];
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Configuration;
    using System.Data;
    using System.Data.SqlClient;
    using System.Linq;
    using System.Reflection;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler.DataService
    {
        public class SqlHelper
        {
            private static Logger logger = new Logger(typeof(SqlHelper));
            private static string ConnStr = ConfigurationManager.ConnectionStrings["mvc5"].ConnectionString;
    
            /// <summary>
            /// 事务执行
            /// </summary>
            /// <param name="sql"></param>
            public static void ExecuteNonQuery(string sql)
            {
                using (SqlConnection sqlConn = new SqlConnection(ConnStr))
                {
                    sqlConn.Open();
                    SqlCommand cmd = new SqlCommand(sql, sqlConn);
                    cmd.ExecuteNonQuery();//.ExecuteNonQueryAsync();//
                }
            }
    
            public static void ExecuteNonQueryWithTrans(string sql)
            {
                SqlTransaction trans = null;
                try
                {
                    using (SqlConnection sqlConn = new SqlConnection(ConnStr))
                    {
                        sqlConn.Open();
                        trans = sqlConn.BeginTransaction();
                        SqlCommand cmd = new SqlCommand(sql, sqlConn, trans);
                        cmd.ExecuteNonQuery();//.ExecuteNonQueryAsync();//
                        trans.Commit();
                    }
                }
                catch (Exception ex)
                {
                    //logger.Error(string.Format("ExecuteNonQueryWithTrans出现异常,sql={0}", sql), ex);
                    if (trans != null && trans.Connection != null)
                        trans.Rollback();
                    throw ex;
                }
                finally
                {
                }
            }
    
            public static List<T> QueryList<T>(string sql) where T : new()
            {
                using (SqlConnection sqlConn = new SqlConnection(ConnStr))
                {
                    sqlConn.Open();
                    SqlCommand cmd = new SqlCommand(sql, sqlConn);
                    return TransList<T>(cmd.ExecuteReader());
                }
            }
    
            public static void Insert<T>(T model, string tableName) where T : new()
            {
                string sql = GetInsertSql<T>(model, tableName);
                ExecuteNonQuery(sql);
            }
    
            public static void InsertList<T>(List<T> list, string tableName) where T : new()
            {
                string sql = string.Join(" ", list.Select(t => GetInsertSql<T>(t, tableName)));
                ExecuteNonQuery(sql);
            }
    
            #region Private
            private static string GetInsertSql<T>(T model, string tableName)
            {
                StringBuilder sbSql = new StringBuilder();
    
                StringBuilder sbFields = new StringBuilder();
                StringBuilder sbValues = new StringBuilder();
    
                Type type = model.GetType();
                var properties = type.GetProperties();
                foreach (PropertyInfo p in properties)
                {
                    string name = p.Name;
                    if (!name.Equals("id", StringComparison.OrdinalIgnoreCase))
                    {
                        sbFields.AppendFormat("[{0}],", name);
                        string sValue = null;
                        object oValue = p.GetValue(model);
                        if (oValue != null)
                            sValue = oValue.ToString().Replace("'", "");
                        sbValues.AppendFormat("'{0}',", sValue);
                    }
                }
                sbSql.AppendFormat("INSERT INTO {0} ({1}) VALUES ({2});", tableName, sbFields.ToString().TrimEnd(','), sbValues.ToString().TrimEnd(','));
                return sbSql.ToString();
            }
    
            private static List<T> TransList<T>(SqlDataReader reader) where T : new()
            {
                List<T> tList = new List<T>();
                Type type = typeof(T);
                var properties = type.GetProperties();
                if (reader.Read())
                {
                    do
                    {
                        T t = new T();
                        foreach (PropertyInfo p in properties)
                        {
                            p.SetValue(t, Convert.ChangeType(reader[p.Name], p.PropertyType));
                        }
                        tList.Add(t);
                    }
                    while (reader.Read());
                }
                return tList;
            }
    
            private static T TransModel<T>(SqlDataReader reader) where T : new()
            {
                T t = new T();
                if (reader.Read())
                {
                    do
                    {
                        Type type = typeof(T);
                        var properties = type.GetProperties();
                        foreach (PropertyInfo p in properties)
                        {
                            p.SetValue(t, Convert.ChangeType(reader[p.Name], p.PropertyType));
                        }
                    }
                    while (reader.Read());
                }
                return t;
            }
            #endregion Private
        }
    }
    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    using Crawler.Model;
    
    namespace Crawler.DataService
    {
        public class WarnRepository //: IRepository<Commodity>
        {
            private Logger logger = new Logger(typeof(WarnRepository));
            public void SaveWarn(Category category, string msg)
            {
                StreamWriter sw = null;
                try
                {
                    string recordFileName = string.Format("warn/{0}/{1}/{2}.txt", category.CategoryLevel, category.ParentCode, category.Id);
                    string totolPath = Path.Combine(ObjectFactory.DataPath, recordFileName);
                    if (!Directory.Exists(Path.GetDirectoryName(totolPath)))
                    {
                        Directory.CreateDirectory(Path.GetDirectoryName(totolPath));
                        sw = File.CreateText(totolPath);
                    }
                    else
                    {
                        sw = File.AppendText(totolPath);
                    }
                    sw.WriteLine(msg);
                    sw.WriteLine(JsonConvert.SerializeObject(JsonConvert.SerializeObject(category)));
                }
                catch (Exception e)
                {
                    logger.Error("SaveWarn出现异常", e);
                }
                finally
                {
                    if (sw != null)
                    {
                        sw.Flush();
                        sw.Close();
                        sw.Dispose();
                    }
                }
            }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler.Model
    {
        public class BaseModel
        {
            public int Id { get; set; }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler.Model
    {
        public class Category:BaseModel
        {
            public string Code { get; set; }
            public string ParentCode { get; set; }
            public string Name { get; set; }
            public string Url { get; set; }
            public int CategoryLevel { get; set; }
            public int State { get; set; }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler.Model
    {
        public class Commodity : BaseModel
        {
            public long ProductId { get; set; }
            public int CategoryId { get; set; }
            public string Title { get; set; }
            public decimal Price { get; set; }
            public string Url { get; set; }
            public string ImageUrl { get; set; }
        }
    
        //jQuery5427073([{"id":"J_1707419","p":"5149.00","m":"5499.00"},{"id":"J_1589214","p":"1999.00","m":"2999.00"},{"id":"J_1546310","p":"3999.00","m":"4999.00"},{"id":"J_1510479","p":"2999.00","m":"3569.00"},{"id":"J_1707420","p":"4149.00","m":"4499.00"},{"id":"J_1770620","p":"2099.00","m":"2499.00"},{"id":"J_1258277","p":"2699.00","m":"3299.00"},{"id":"J_1707423","p":"4599.00","m":"4705.00"},{"id":"J_1252778","p":"3099.00","m":"4199.00"},{"id":"J_1553732","p":"3298.00","m":"4598.00"},{"id":"J_1576022","p":"2999.00","m":"3999.00"},{"id":"J_1420120","p":"1999.00","m":"2899.00"},{"id":"J_647948","p":"1299.00","m":"1698.00"},{"id":"J_1044476","p":"1999.00","m":"2999.00"},{"id":"J_1376591","p":"1299.00","m":"1599.00"},{"id":"J_1416294","p":"4599.00","m":"5898.00"},{"id":"J_1455427","p":"1499.00","m":"1999.00"},{"id":"J_1253502","p":"2799.00","m":"3999.00"},{"id":"J_1553624","p":"2998.00","m":"4398.00"},{"id":"J_1301951","p":"2279.00","m":"3999.00"},{"id":"J_1115374","p":"2499.00","m":"4299.00"},{"id":"J_671315","p":"1999.00","m":"2898.00"},{"id":"J_1283945","p":"3099.00","m":"4199.00"},{"id":"J_1283940","p":"2499.00","m":"2999.00"},{"id":"J_1027317","p":"2799.00","m":"5999.00"},{"id":"J_1314962","p":"3699.00","m":"5199.00"},{"id":"J_1565150","p":"4068.00","m":"5727.00"},{"id":"J_1565175","p":"3788.00","m":"5377.00"},{"id":"J_1565182","p":"3938.00","m":"5757.00"},{"id":"J_1209084","p":"3599.00","m":"4999.00"}]);
        public class CommodityPrice
        {
            public string id { get; set; }
            public decimal p { get; set; }
            public decimal m { get; set; }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler
    {
        /// <summary>
        /// http://tool.sufeinet.com/HttpHelper.aspx
        /// </summary>
        public class HttpHelper
        {
            private static Logger logger = new Logger(typeof(HttpHelper));
    
            /// <summary>
            /// 根据url下载内容  之前是GB2312
            /// </summary>
            /// <param name="url"></param>
            /// <returns></returns>
            public static string DownloadUrl(string url)
            {
                return DownloadHtml(url, Encoding.UTF8);
            }
    
            /// <summary>
            /// 下载html
            /// http://tool.sufeinet.com/HttpHelper.aspx
            /// HttpWebRequest功能比较丰富,WebClient使用比较简单
            /// </summary>
            /// <param name="url"></param>
            /// <returns></returns>
            public static string DownloadHtml(string url, Encoding encode)
            {
                string html = string.Empty;
                try
                {
                    HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;//模拟请求
                    request.Timeout = 30 * 1000;//设置30s的超时
                    request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";
                    request.ContentType = "text/html; charset=utf-8";// "text/html;charset=gbk";// 
                    //request.Host = "search.yhd.com";
    
                    //request.Headers.Add("Cookie", @"newUserFlag=1; guid=YFT7C9E6TMFU93FKFVEN7TEA5HTCF5DQ26HZ; gray=959782; cid=av9kKvNkAPJ10JGqM_rB_vDhKxKM62PfyjkB4kdFgFY5y5VO; abtest=31; _ga=GA1.2.334889819.1425524072; grouponAreaId=37; provinceId=20; search_showFreeShipping=1; rURL=http%3A%2F%2Fsearch.yhd.com%2Fc0-0%2Fkiphone%2F20%2F%3Ftp%3D1.1.12.0.73.Ko3mjRR-11-FH7eo; aut=5GTM45VFJZ3RCTU21MHT4YCG1QTYXERWBBUFS4; ac=57265177%40qq.com; msessionid=H5ACCUBNPHMJY3HCK4DRF5VD5VA9MYQW; gc=84358431%2C102362736%2C20001585%2C73387122; tma=40580330.95741028.1425524063040.1430288358914.1430790348439.9; tmd=23.40580330.95741028.1425524063040.; search_browse_history=998435%2C1092925%2C32116683%2C1013204%2C6486125%2C38022757%2C36224528%2C24281304%2C22691497%2C26029325; detail_yhdareas=""; cart_cookie_uuid=b64b04b6-fca7-423b-b2d1-ff091d17e5e5; gla=20.237_0_0; JSESSIONID=14F1F4D714C4EE1DD9E11D11DDCD8EBA; wide_screen=1; linkPosition=search");
    
                    //request.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
                    //request.Headers.Add("Accept-Encoding", "gzip, deflate, sdch");
                    //request.Headers.Add("Referer", "http://list.yhd.com/c0-0/b/a-s1-v0-p1-price-d0-f0-m1-rt0-pid-mid0-kiphone/");
    
                    //Encoding enc = Encoding.GetEncoding("GB2312"); // 如果是乱码就改成 utf-8 / GB2312
    
                    using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)//发起请求
                    {
                        if (response.StatusCode != HttpStatusCode.OK)
                        {
                            logger.Warn(string.Format("抓取{0}地址返回失败,response.StatusCode为{1}", url, response.StatusCode));
                        }
                        else
                        {
                            try
                            {
                                StreamReader sr = new StreamReader(response.GetResponseStream(), encode);
                                html = sr.ReadToEnd();//读取数据
                                sr.Close();
                            }
                            catch (Exception ex)
                            {
                                logger.Error(string.Format("DownloadHtml抓取{0}保存失败", url), ex);
                                html = null;
                            }
                        }
                    }
                }
                catch (System.Net.WebException ex)
                {
                    if (ex.Message.Equals("远程服务器返回错误: (306)。"))
                    {
                        logger.Error("远程服务器返回错误: (306)。", ex);
                        return null;
                    }
                }
                catch (Exception ex)
                {
                    logger.Error(string.Format("DownloadHtml抓取{0}出现异常", url), ex);
                    html = null;
                }
                return html;
            }
        }
    }
    using System;
    using System.IO;
    using System.Diagnostics;
    using System.Configuration;
    using log4net.Config;
    using log4net;
    using Newtonsoft.Json;
    using System.Collections.Generic;
    
    namespace Crawler
    {
        public class Logger
        {
            static Logger()
            {
                XmlConfigurator.Configure(new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "CfgFiles\log4net.cfg.xml")));
                ILog Log = LogManager.GetLogger(typeof(Logger));
                Log.Info("系统初始化Logger模块");
            }
    
            private ILog loger = null;
            public Logger(Type type)
            {
                loger = LogManager.GetLogger(type);
            }
    
            /// <summary>
            /// Log4日志
            /// </summary>
            /// <param name="msg"></param>
            /// <param name="ex"></param>
            public void Error(string msg = "出现异常", Exception ex = null)
            {
                Console.WriteLine(msg);
                loger.Error(msg, ex);
            }
    
            /// <summary>
            /// Log4日志
            /// </summary>
            /// <param name="msg"></param>
            public void Warn(string msg)
            {
                Console.WriteLine(msg);
                loger.Warn(msg);
            }
    
            /// <summary>
            /// Log4日志
            /// </summary>
            /// <param name="msg"></param>
            public void Info(string msg)
            {
                Console.WriteLine(msg);
                loger.Info(msg);
            }
    
            /// <summary>
            /// Log4日志
            /// </summary>
            /// <param name="msg"></param>
            public void Debug(string msg )
            {
                Console.WriteLine(msg);
                loger.Debug(msg);
            }
    
            
        }
    }
    using HtmlAgilityPack;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    using Newtonsoft.Json;
    
    using Crawler.Model;
    
    namespace Crawler
    {
        public class CategorySearch
        {
    
            private static Logger logger = new Logger(typeof(CategorySearch));
            private static int count = 1;
    
            public static List<Category> Crawler(string url)
            {
                List<Category> categoryList = new List<Category>();
                try
                {
                    string html = HttpHelper.DownloadUrl(url);
    
                    HtmlDocument doc = new HtmlDocument();
                    doc.LoadHtml(html);
                    string fristPath = "//*[@class='category-item m']";
                    HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(fristPath);
                    int k = 1;
                    foreach (HtmlNode node in nodeList)
                    {
                        categoryList.AddRange(First(node.InnerHtml, k++.ToString("00") + "f", "root"));
                    }
    
                }
                catch (Exception ex)
                {
                    logger.Error("CrawlerMuti出现异常", ex);
                }
                return categoryList;
            }
    
            /// <summary>
            /// 对每一个一级类进行查找
            /// </summary>
            /// <param name="html"></param>
            /// <param name="code"></param>
            /// <param name="parentCode"></param>
            /// <returns></returns>
            private static List<Category> First(string html, string code, string parentCode)
            {
                List<Category> categoryList = new List<Category>();
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                string path = "//*[@class='mt']/h2/span";
                HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(path);
                foreach (HtmlNode node in nodeList)
                {
                    Category category = new Category()
                    {
                        Id = count++,
                        State = 0,
                        CategoryLevel = 1,
                        Code = code,
                        ParentCode = parentCode
                    };
                    category.Name = node.InnerText;
                    category.Url = "";// node.Attributes["href"].Value;
                    categoryList.Add(category);
                }
                categoryList.AddRange(Second(html, code));
                return categoryList;
            }
    
            /// <summary>
            /// 在一个一级类下面的全部二级类进行查找
            /// </summary>
            /// <param name="html"></param>
            /// <param name="parentCode"></param>
            /// <returns></returns>
            private static List<Category> Second(string html, string parentCode)
            {
                List<Category> categoryList = new List<Category>();
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                string path = "//*[@class='items']/dl";
                HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(path);
                int k = 1;
                foreach (HtmlNode node in nodeList)
                {
                    string code = string.Format("{0}{1}s", parentCode, k.ToString("00"));
                    string secondHtml = node.InnerHtml;
                    if (string.IsNullOrWhiteSpace(secondHtml)) continue;
                    HtmlDocument secondDoc = new HtmlDocument();
                    secondDoc.LoadHtml(secondHtml);
                    Category category = new Category()
                    {
                        Id = count++,
                        State = 0,
                        CategoryLevel = 2,
                        Code = code,
                        ParentCode = parentCode
                    };
    
    
                    HtmlNode secondNode = secondDoc.DocumentNode.SelectSingleNode("//dt/a");
                    if (secondNode == null)//图书音像
                    {
                        secondNode = secondDoc.DocumentNode.SelectSingleNode("//dt");
                    }
                    category.Name = secondNode.InnerText;
                    if (secondNode.Attributes["href"] != null)
                    {
                        category.Url = secondNode.Attributes["href"].Value;
                        if (!category.Url.StartsWith("http:"))
                        {
                            category.Url = string.Concat("http:", category.Url);
                        }
                    }
                    categoryList.Add(category);
                    HtmlNode thirdNode = secondDoc.DocumentNode.SelectSingleNode("//dd");
                    if (thirdNode == null) continue;
                    categoryList.AddRange(Third(thirdNode.InnerHtml, code));
                    k++;
                }
                return categoryList;
            }
    
            /// <summary>
            /// 在一个二级类下的全部三级类里面进行查找
            /// </summary>
            /// <param name="html"></param>
            /// <param name="parentCode"></param>
            /// <returns></returns>
            private static List<Category> Third(string html, string parentCode)
            {
                List<Category> categoryList = new List<Category>();
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                string path = "//a";
                HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(path);
                if (nodeList == null || nodeList.Count == 0) return categoryList;
                int k = 1;
                foreach (HtmlNode node in nodeList)
                {
                    string code = string.Format("{0}{1}t", parentCode, k.ToString("00"));
                    Category category = new Category()
                    {
                        Id = count++,
                        State = 0,
                        CategoryLevel = 3,
                        Code = code,
                        ParentCode = parentCode
                    };
                    category.Name = node.InnerText;
                    category.Url = node.Attributes["href"].Value;
                    if (!category.Url.StartsWith("http:"))
                    {
                        category.Url = string.Concat("http:", category.Url);
                    }
                    categoryList.Add(category);
                    k++;
                }
                return categoryList;
            }
        }
    }
    using HtmlAgilityPack;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    using Newtonsoft.Json;
    using Crawler.DataService;
    
    using Crawler.Model;
    
    namespace Crawler
    {
        public class CommoditySearch
        {
            private Logger logger = new Logger(typeof(CommoditySearch));
            private WarnRepository warnRepository = new WarnRepository();
            private CommodityRepository commodityRepository = new CommodityRepository();
            private Category category = null;
    
            public CommoditySearch(Category _category)
            {
                category = _category;
            }
    
            public void Crawler()
            {
                try
                {
                    if (string.IsNullOrEmpty(category.Url))
                    {
                        warnRepository.SaveWarn(category, string.Format("Url为空,Name={0} Level={1} Url={2}", category.Name, category.CategoryLevel, category.Url));
                        return;
                    }
                    string html = HttpHelper.DownloadUrl(category.Url);//下载html
    
                    HtmlDocument doc = new HtmlDocument();
                    doc.LoadHtml(html);//加载html
                    string pageNumberPath = @"//*[@id='J_topPage']/span/i";
                    HtmlNode pageNumberNode = doc.DocumentNode.SelectSingleNode(pageNumberPath);
                    if (pageNumberNode != null)
                    {
                        string sNumber = pageNumberNode.InnerText;
                        for (int i = 1; i < int.Parse(sNumber) + 1; i++)
                        {
                            string pageUrl = string.Format("{0}&page={1}", category.Url, i);
                            try
                            {
                                List<Commodity> commodityList = GetCommodityList(category, pageUrl.Replace("&page=1&", string.Format("&page={0}&", i)));
                                //commodityRepository.SaveList(commodityList);
                            }
                            catch (Exception ex)//保证一页的错误不影响另外一页
                            {
                                logger.Error("Crawler的commodityRepository.SaveList(commodityList)出现异常", ex);
                            }
                        }
                    }
    
    
    
    
                    //string fristPath = "//*[@id='J_bottomPage']/span[1]/a";
                    //HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(fristPath);//xPath分析
                    //if (noneNodeList == null)
                    //{
                    //    warnRepository.SaveWarn(category, string.Format("分页数据为空,Name={0} Level={1} Url={2}", category.Name, category.CategoryLevel, category.Url));
                    //    return;
                    //}
    
                    //string pageUrl = null;
                    //foreach (var node in noneNodeList)
                    //{
                    //    string sNum = node.InnerHtml;
                    //    if (sNum.Equals("1"))
                    //    {
                    //        pageUrl = node.Attributes["href"].Value.Replace("&amp;", "&");
                    //        if (!pageUrl.StartsWith("http://"))
                    //            pageUrl = string.Format("http://list.jd.com{0}", pageUrl);
                    //        break;
                    //    }
                    //}
                    //string sMaxPageNumPath = "//*[@id='J_bottomPage']/span[2]/em[1]/b";
                    //HtmlNode sMaxPageNumPathNode = doc.DocumentNode.SelectSingleNode(sMaxPageNumPath);
                    //string sMaxPageNum = sMaxPageNumPathNode.InnerHtml;
                    //for (int i = 1; i < int.Parse(sMaxPageNum) + 1; i++)
                    //{
                    //    try
                    //    {
                    //        List<Commodity> commodityList = GetCommodityList(category, pageUrl.Replace("&page=1&", string.Format("&page={0}&", i)));
                    //        commodityRepository.SaveList(commodityList);
                    //    }
                    //    catch (Exception ex)//保证一页的错误不影响另外一页
                    //    {
                    //        logger.Error("Crawler的commodityRepository.SaveList(commodityList)出现异常", ex);
                    //    }
                    //}
                }
                catch (Exception ex)
                {
                    logger.Error("CrawlerMuti出现异常", ex);
                    warnRepository.SaveWarn(category, string.Format("出现异常,Name={0} Level={1} Url={2}", category.Name, category.CategoryLevel, category.Url));
                }
            }
    
            private List<Commodity> GetCommodityList(Category category, string url)
            {
                string html = HttpHelper.DownloadUrl(url);
                List<Commodity> commodityList = new List<Commodity>();
                try
                {
                    if (string.IsNullOrEmpty(html)) return commodityList;
                    HtmlDocument doc = new HtmlDocument();
                    doc.LoadHtml(html);
                    string liPath = "//*[@id='plist']/ul/li";
                    HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(liPath);
                    if (noneNodeList == null || noneNodeList.Count == 0)
                    {
                        warnRepository.SaveWarn(category, string.Format("GetCommodityList商品数据为空,Name={0} Level={1} category.Url={2} url={3}", category.Name, category.CategoryLevel, category.Url, url));
                        return commodityList;
                    }
                    foreach (var node in noneNodeList)
                    {
                        HtmlDocument docChild = new HtmlDocument();
                        docChild.LoadHtml(node.OuterHtml);
    
                        Commodity commodity = new Commodity()
                        {
                            CategoryId = category.Id
                        };
    
                        string urlPath = "//*[@class='p-name']/a";
                        HtmlNode urlNode = docChild.DocumentNode.SelectSingleNode(urlPath);
                        if (urlNode == null)
                        {
                            continue;
                        }
                        commodity.Url = urlNode.Attributes["href"].Value;
                        if (!commodity.Url.StartsWith("http:"))
                            commodity.Url = "http:" + commodity.Url;
    
                        string sId = Path.GetFileName(commodity.Url).Replace(".html", "");
                        commodity.ProductId = long.Parse(sId);
    
                        //*[@id="plist"]/ul/li[1]/div/div[3]/a/em
                        string titlePath = "//*[@class='p-name']/a/em";
                        HtmlNode titleNode = docChild.DocumentNode.SelectSingleNode(titlePath);
                        if (titleNode == null)
                        {
                            //Log.Error(titlePath);
                            continue;
                        }
                        commodity.Title = titleNode.InnerText;
    
                        string iamgePath = "//*[@class='p-img']/a/img";
                        HtmlNode imageNode = docChild.DocumentNode.SelectSingleNode(iamgePath);
                        if (imageNode == null)
                        {
                            continue;
                        }
                        if (imageNode.Attributes.Contains("src"))
                            commodity.ImageUrl = imageNode.Attributes["src"].Value;
                        else if (imageNode.Attributes.Contains("original"))
                            commodity.ImageUrl = imageNode.Attributes["original"].Value;
                        else if (imageNode.Attributes.Contains("data-lazy-img"))
                            commodity.ImageUrl = imageNode.Attributes["data-lazy-img"].Value;
                        else
                        {
                            continue;
                        }
                        if (!commodity.ImageUrl.StartsWith("http:"))
                            commodity.ImageUrl = "http:" + commodity.ImageUrl;
    
    
                        commodityList.Add(commodity);
                    }
                    Console.WriteLine("{0}一共获取了{1}条数据", url, commodityList.Count);
                }
                catch (Exception ex)
                {
                    logger.Error(string.Format("GetCommodityList出现异常,url={0}", url), ex);
                }
                return GetCommodityPrice(category, commodityList);
            }
    
            /// <summary>
            /// 获取商品价格
            /// </summary>
            /// <param name="commodityList"></param>
            /// <returns></returns>
            private List<Commodity> GetCommodityPrice(Category category, List<Commodity> commodityList)
            {
                try
                {
                    if (commodityList == null || commodityList.Count() == 0)
                        return commodityList;
    
                    StringBuilder sb = new StringBuilder();
                    //sb.Append(@"http://p.3.cn/prices/mgets?my=list_price&type=1&area=1_72_4137&skuIds=");
                    //sb.Append(string.Join("%2C", commodityList.Select(c => string.Format("J_{0}", c.ProductId))));
                    //
                    sb.AppendFormat("http://p.3.cn/prices/mgets?callback=jQuery1069298&type=1&area=1_72_4137_0&skuIds={0}&pdbp=0&pdtk=&pdpin=&pduid=1945966343&_=1469022843655", string.Join("%2C", commodityList.Select(c => string.Format("J_{0}", c.ProductId))));
                    string html = HttpHelper.DownloadUrl(sb.ToString());
                    if (string.IsNullOrWhiteSpace(html))
                    {
                        logger.Warn(string.Format("获取url={0}时获取的html为空", sb.ToString()));
                    }
                    html = html.Substring(html.IndexOf("(") + 1);
                    html = html.Substring(0, html.LastIndexOf(")"));
                    List<CommodityPrice> priceList = JsonConvert.DeserializeObject<List<CommodityPrice>>(html);
                    commodityList.ForEach(c => c.Price = priceList.FirstOrDefault(p => p.id.Equals(string.Format("J_{0}", c.ProductId))).p);
                    //commodityList.ForEach(c => Console.WriteLine(" Title={0}  ImageUrl={1} Url={2} Price={3} Id={4}", c.Title, c.ImageUrl, c.Url, c.Price, c.Id));
                }
                catch (Exception ex)
                {
                    logger.Error("GetCommodityPrice出现异常", ex);
                }
                return commodityList;
            }
        }
    }
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading;
    using System.Threading.Tasks;
    
    using Newtonsoft.Json;
    using Crawler.DataService;
    using Crawler.Model;
    
    namespace Crawler
    {
        /// <summary>
        /// 1 爬虫,爬虫攻防
        /// 2 下载html
        /// 3 xpath解析html,获取数据和深度抓取
        /// 4 不一样的属性和ajax数据的获取
        /// 5 多线程爬虫
        /// </summary>
        class Program
        {
            private static Logger logger = new Logger(typeof(Program));
            static void Main(string[] args)
            {
                try
                {
                    Console.WriteLine("今天是爬虫的学习");
    
                    ////测试DownloadHtml
                    //string html = HttpHelper.DownloadHtml(@"http://list.jd.com/list.html?cat=9987,653,655", Encoding.UTF8);
    
                    ////测试获取分类页
                    //string html1 = HttpHelper.DownloadHtml("http://www.jd.com/allSort.aspx", Encoding.UTF8);
    
    
    
                    ////测试抓取商品列表
                    //string testCategory = "{"Id":73,"Code":"02f01s01T","ParentCode":"02f01s","Name":"烟机/灶具","Url":"http://list.jd.com/list.html?cat=737,13297,1300","Level":3}";
                    //new CommoditySearch(JsonConvert.DeserializeObject<Category>(testCategory)).Crawler();
    
                    //List<Category> categoryList = CategorySearch.Crawler("http://www.jd.com/allSort.aspx");
    
                    //Crawl();
                }
                catch (Exception ex)
                {
                    logger.Error("异常啦,", ex);
                    Console.WriteLine("*****************木有成功**********************");
                    Console.ReadLine();
                }
                Console.ReadLine();
            }
    
            /// <summary>
            /// 抓取
            /// </summary>
            private static void Crawl()
            {
                DBInit dbInit = new DBInit();
                CategoryRepository categoryRepository = new CategoryRepository();
                Console.WriteLine("请输入Y/N进行类别表初始化确认! Y 删除Category表然后重新创建,然后抓取类型数据,N(或者其他)跳过");
                string input = Console.ReadLine();
                if (input.Equals("Y", StringComparison.OrdinalIgnoreCase))
                {
                    dbInit.InitCategoryTable();
                    List<Category> categoryList = CategorySearch.Crawler("http://www.jd.com/allSort.aspx");
    
                    categoryRepository.Save(categoryList);
                    Console.WriteLine("类型数据初始化完成,共抓取类别{0}个", categoryList.Count);
                }
                else
                {
                    Console.WriteLine("你选择不初始化类别数据");
                }
                Console.WriteLine("*****************^_^**********************");
    
                Console.WriteLine("请输入Y/N进行商品数据初始化确认! Y 删除全部商品表表然后重新创建,然后抓取商品数据,N(或者其他)跳过");
                input = Console.ReadLine();
                if (input.Equals("Y", StringComparison.OrdinalIgnoreCase))
                {
                    dbInit.InitCommodityTable();
                    CrawlerCommodity();
                }
                Console.WriteLine("*****************^_^**********************");
                //CleanAll();
            }
    
            /// <summary>
            /// 抓取商品
            /// </summary>
            private static void CrawlerCommodity()
            {
                Console.WriteLine("{0} jd商品开始抓取 - -", DateTime.Now);
                CategoryRepository categoryRepository = new CategoryRepository();
                List<Category> categoryList = categoryRepository.QueryListByLevel(3);
    
                List<Task> taskList = new List<Task>();
                TaskFactory taskFactory = new TaskFactory();
                foreach (Category category in categoryList)
                {
                    CommoditySearch searcher = new CommoditySearch(category);
                    //searcher.Crawler();
                    taskList.Add(taskFactory.StartNew(searcher.Crawler));
                    if (taskList.Count > 15)
                    {
                        taskList = taskList.Where(t => !t.IsCompleted && !t.IsCanceled && !t.IsFaulted).ToList();
                        Task.WaitAny(taskList.ToArray());
                    }
                }
                Task.WaitAll(taskList.ToArray());
                Console.WriteLine("{0} jd商品抓取全部完成 - -", DateTime.Now);
                CleanAll();
            }
    
            /// <summary>
            /// 清理数据
            /// </summary>
            private static void CleanAll()
            {
                try
                {
                    Console.WriteLine("{0} 开始清理重复数据 - -", DateTime.Now);
                    StringBuilder sb = new StringBuilder();
                    for (int i = 1; i < 31; i++)
                    {
                        sb.AppendFormat(@"DELETE FROM [dbo].[JD_Commodity_{0}] where productid IN(select productid from [dbo].[JD_Commodity_{0}] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_{0}] group by productid,CategoryId having count(0)>1);", i.ToString("000"));
                    }
                    #region
                    /*
                     DELETE FROM [dbo].[JD_Commodity_001] where productid IN(select productid from [dbo].[JD_Commodity_001] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_001] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_002] where productid IN(select productid from [dbo].[JD_Commodity_002] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_002] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_003] where productid IN(select productid from [dbo].[JD_Commodity_003] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_003] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_004] where productid IN(select productid from [dbo].[JD_Commodity_004] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_004] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_005] where productid IN(select productid from [dbo].[JD_Commodity_005] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_005] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_006] where productid IN(select productid from [dbo].[JD_Commodity_006] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_006] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_007] where productid IN(select productid from [dbo].[JD_Commodity_007] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as IDv from [dbo].[JD_Commodity_007] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_008] where productid IN(select productid from [dbo].[JD_Commodity_008] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_008] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_009] where productid IN(select productid from [dbo].[JD_Commodity_009] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_009] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_010] where productid IN(select productid from [dbo].[JD_Commodity_010] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_010] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_011] where productid IN(select productid from [dbo].[JD_Commodity_011] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_011] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_012] where productid IN(select productid from [dbo].[JD_Commodity_012] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_012] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_013] where productid IN(select productid from [dbo].[JD_Commodity_013] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_013] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_014] where productid IN(select productid from [dbo].[JD_Commodity_014] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_014] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_015] where productid IN(select productid from [dbo].[JD_Commodity_015] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_015] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_016] where productid IN(select productid from [dbo].[JD_Commodity_016] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_016] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_017] where productid IN(select productid from [dbo].[JD_Commodity_017] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_017] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_018] where productid IN(select productid from [dbo].[JD_Commodity_018] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_018] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_019] where productid IN(select productid from [dbo].[JD_Commodity_019] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_019] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_020] where productid IN(select productid from [dbo].[JD_Commodity_020] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_020] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_021] where productid IN(select productid from [dbo].[JD_Commodity_021] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_021] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_022] where productid IN(select productid from [dbo].[JD_Commodity_022] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_022] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_023] where productid IN(select productid from [dbo].[JD_Commodity_023] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_023] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_024] where productid IN(select productid from [dbo].[JD_Commodity_024] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_024] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_025] where productid IN(select productid from [dbo].[JD_Commodity_025] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_025] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_026] where productid IN(select productid from [dbo].[JD_Commodity_026] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_026] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_027] where productid IN(select productid from [dbo].[JD_Commodity_027] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_027] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_028] where productid IN(select productid from [dbo].[JD_Commodity_028] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_028] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_029] where productid IN(select productid from [dbo].[JD_Commodity_029] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_029] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_030] where productid IN(select productid from [dbo].[JD_Commodity_030] group by productid,CategoryId having count(0)>1)
                                    AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_030] group by productid,CategoryId having count(0)>1);
                     */
                    #endregion
                    Console.WriteLine("执行清理sql:{0}", sb.ToString());
                    SqlHelper.ExecuteNonQuery(sb.ToString());
                    Console.WriteLine("{0} 完成清理重复数据 - -", DateTime.Now);
                }
                catch (Exception ex)
                {
                    logger.Error("CleanAll出现异常", ex);
                }
                finally
                {
                    Console.WriteLine("{0} 结束清理重复数据 - -", DateTime.Now);
                }
            }
        }
    }
  • 相关阅读:
    2012年"浪潮杯"山东省第三届ACM大学生程序设计竞赛 Fruit Ninja I
    HDU 1045
    ZOJ 3946 Highway Project
    python基础知识
    粘包问题以及解决方法
    socket套接字
    网络编程 互联网协议 tcp原理
    反射 魔法方法 单例模式
    classmethod与staticmethod isinstance与issubclass
    封装 多态
  • 原文地址:https://www.cnblogs.com/zhengqian/p/8655401.html
Copyright © 2011-2022 走看看