zoukankan      html  css  js  c++  java
  • Dooioo Deal

    using AnfleCrawler.Common;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace AnfleCrawler.DataAnalyzer
    {
        internal class Dooioo : AnalyzerBase
        {
            protected override void AnalyzeInternal(PageLandEntity current)
            {
                var lander = Crawler.Lander;
                var pHandler = CreateContentHandler(current);
                switch (current.Depth)
                {
                    case 0:
                        {
                            var dom = lander.GetDocument(pHandler);
                            DoPerPaging(current, dom.DocumentNode, ".pagination a:last-child");
    
                            foreach (var node in QueryNodes(dom.DocumentNode, "#hlist a"))
                            {
                                var url = GetHref(node, current.Url);
                                Crawler.PushUrl(url, DataDepth.Houses);
                            }
                        }
                        break;
                    case DataDepth.Houses:
                        {
                            var dom = lander.GetDocument(pHandler);
                            var attrs = new AttributeFiller();
    
                            var Nset = QueryNodes(dom.DocumentNode, "#building-info li").Select(p =>
                            {
                                var spans = QueryTexts(p, "span").ToArray();
                                return string.Format("{0}:{1}", spans[0], spans[1]);
                            });
                            attrs.Append(Nset);
    
                            Guid hashKey = GenHashKey(current.Url.OriginalString);
                            var bo = Crawler.Repository.LoadHouses(hashKey);
                            bo.SiteID = current.Url.GetDomain();
                            bo.PageUrl = current.Url.OriginalString;
                            bo.CityName = Crawler.Config.CityName;
                            attrs.FillEntity(bo, new Dictionary<string, string>() 
                            {
                                {"小区名", "小区名称"},
                                {"板块", "所属区域"},
                                {"建造年代", "竣工时间"},
                                {"地址", "小区地址"},
                                {"物业类型", "物业类别"},
                            });
                            MapMark(bo);
                            Repository.Save(bo);
                            Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
    
                            var Pset = QueryNodes(dom.DocumentNode, ".pagination a", false);
                            if (Pset.Any())
                            {
                                string pageCount = Pset.Skip(Pset.Count() - 2).First().InnerText;
                                Crawler.PushUrl(new Uri(string.Format("http://www.dooioo.com/ershoufang/s117862?p=[2-{0}]", pageCount)), DataDepth.Deal, bo.RowID);
                            }
                            SaveHouselisting(bo.RowID, current, dom);
                        }
                        break;
                    case DataDepth.Deal:
                        {
                            Guid housesID = (Guid)current.State;
                            pHandler.CrossLoad = (arg, xDom) =>
                            {
                                string pName = "p";
                                if (arg.IsRepost)
                                {
                                    arg.IsRepost = false;
                                    return;
                                }
                                var query = System.Web.HttpUtility.ParseQueryString(arg.RequestUrl.Query);
                                int pageIndex;
                                if (!int.TryParse(query[pName], out pageIndex))
                                {
                                    pageIndex = 1;
                                }
    
                                var input = xDom.GetElementsByTagName("ul").Cast<System.Windows.Forms.HtmlElement>()
                                    .Where(p => p.GetAttribute("class").Contains("pagination")).FirstOrDefault();
                                if (input == null)
                                {
                                    App.LogInfo("CrossLoad xPaing:{0} {1}", this.GetType().Name, xDom.Body.InnerHtml);
                                    return;
                                }
                                var btn = input.GetElementsByTagName("a").Cast<System.Windows.Forms.HtmlElement>()
                                    .Where(p => p.InnerText == pageIndex.ToString()).First();
                                btn.InvokeMember("click");
                                arg.IsRepost = true;
                            };
                            var dom = lander.GetDocument(pHandler);
                            SaveHouselisting(housesID, current, dom);
                        }
                        break;
                }
            }
    
            private void SaveHouselisting(Guid housesID, PageLandEntity current, HtmlAgilityPack.HtmlDocument dom)
            {
                var attrs = new AttributeFiller();
                foreach (var node in QueryNodes(dom.DocumentNode, "#history-list tr"))
                {
                    var spans = QueryTexts(node, "td").ToArray();
                    attrs.Append("HousesID:{0}", housesID);
    
                    DateTime dump;
                    if (DateTime.TryParse(spans[4], out dump))
                    {
                        attrs.Append("TransactionDate:{0}", dump);
                    }
    
                    attrs.Append("SoldPriceOrRent:{0}", spans[2]);
                    attrs.Append("UnitPriceOrLease:{0}", spans[3]);
                    attrs.Append("Apartment:{0}", spans[0]);
                    attrs.Append("ServiceBroker:{0}", spans[5]);
                    attrs.Append("Area:{0}", spans[1]);
    
                    var bo = new HouselistingEntity();
                    attrs.FillEntity(bo);
                    Repository.SaveHouselisting(bo);
                    Crawler.OutWrite("保存小区出售记录 {0}", housesID);
                }
            }
        }
    }
  • 相关阅读:
    py.turtle学习笔记(简单图形绘制)
    eclipse Network Connections
    EntityFramework 6 使用注意事项汇总
    Web发展过程中的一些设计思想和软硬件系统构建方式的一段话
    Fody is only supported on MSBuild 16 and above. Current version: 15
    .net 程序优化的原则-C#语言元素相关
    .net 事务
    关于IIS部署网站后 浏览器HTTP 错误 404.7 请求筛选模块被配置为拒绝该文件扩展名。
    准备学习的书籍列表
    在本地搭建Git厂库并把自己得代码上传到远程厂库
  • 原文地址:https://www.cnblogs.com/Googler/p/4241258.html
Copyright © 2011-2022 走看看