zoukankan      html  css  js  c++  java
  • Dooioo Deal

    using AnfleCrawler.Common;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace AnfleCrawler.DataAnalyzer
    {
        internal class Dooioo : AnalyzerBase
        {
            protected override void AnalyzeInternal(PageLandEntity current)
            {
                var lander = Crawler.Lander;
                var pHandler = CreateContentHandler(current);
                switch (current.Depth)
                {
                    case 0:
                        {
                            var dom = lander.GetDocument(pHandler);
                            DoPerPaging(current, dom.DocumentNode, ".pagination a:last-child");
    
                            foreach (var node in QueryNodes(dom.DocumentNode, "#hlist a"))
                            {
                                var url = GetHref(node, current.Url);
                                Crawler.PushUrl(url, DataDepth.Houses);
                            }
                        }
                        break;
                    case DataDepth.Houses:
                        {
                            var dom = lander.GetDocument(pHandler);
                            var attrs = new AttributeFiller();
    
                            var Nset = QueryNodes(dom.DocumentNode, "#building-info li").Select(p =>
                            {
                                var spans = QueryTexts(p, "span").ToArray();
                                return string.Format("{0}:{1}", spans[0], spans[1]);
                            });
                            attrs.Append(Nset);
    
                            Guid hashKey = GenHashKey(current.Url.OriginalString);
                            var bo = Crawler.Repository.LoadHouses(hashKey);
                            bo.SiteID = current.Url.GetDomain();
                            bo.PageUrl = current.Url.OriginalString;
                            bo.CityName = Crawler.Config.CityName;
                            attrs.FillEntity(bo, new Dictionary<string, string>() 
                            {
                                {"小区名", "小区名称"},
                                {"板块", "所属区域"},
                                {"建造年代", "竣工时间"},
                                {"地址", "小区地址"},
                                {"物业类型", "物业类别"},
                            });
                            MapMark(bo);
                            Repository.Save(bo);
                            Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
    
                            var Pset = QueryNodes(dom.DocumentNode, ".pagination a", false);
                            if (Pset.Any())
                            {
                                string pageCount = Pset.Skip(Pset.Count() - 2).First().InnerText;
                                Crawler.PushUrl(new Uri(string.Format("http://www.dooioo.com/ershoufang/s117862?p=[2-{0}]", pageCount)), DataDepth.Deal, bo.RowID);
                            }
                            SaveHouselisting(bo.RowID, current, dom);
                        }
                        break;
                    case DataDepth.Deal:
                        {
                            Guid housesID = (Guid)current.State;
                            pHandler.CrossLoad = (arg, xDom) =>
                            {
                                string pName = "p";
                                if (arg.IsRepost)
                                {
                                    arg.IsRepost = false;
                                    return;
                                }
                                var query = System.Web.HttpUtility.ParseQueryString(arg.RequestUrl.Query);
                                int pageIndex;
                                if (!int.TryParse(query[pName], out pageIndex))
                                {
                                    pageIndex = 1;
                                }
    
                                var input = xDom.GetElementsByTagName("ul").Cast<System.Windows.Forms.HtmlElement>()
                                    .Where(p => p.GetAttribute("class").Contains("pagination")).FirstOrDefault();
                                if (input == null)
                                {
                                    App.LogInfo("CrossLoad xPaing:{0} {1}", this.GetType().Name, xDom.Body.InnerHtml);
                                    return;
                                }
                                var btn = input.GetElementsByTagName("a").Cast<System.Windows.Forms.HtmlElement>()
                                    .Where(p => p.InnerText == pageIndex.ToString()).First();
                                btn.InvokeMember("click");
                                arg.IsRepost = true;
                            };
                            var dom = lander.GetDocument(pHandler);
                            SaveHouselisting(housesID, current, dom);
                        }
                        break;
                }
            }
    
            private void SaveHouselisting(Guid housesID, PageLandEntity current, HtmlAgilityPack.HtmlDocument dom)
            {
                var attrs = new AttributeFiller();
                foreach (var node in QueryNodes(dom.DocumentNode, "#history-list tr"))
                {
                    var spans = QueryTexts(node, "td").ToArray();
                    attrs.Append("HousesID:{0}", housesID);
    
                    DateTime dump;
                    if (DateTime.TryParse(spans[4], out dump))
                    {
                        attrs.Append("TransactionDate:{0}", dump);
                    }
    
                    attrs.Append("SoldPriceOrRent:{0}", spans[2]);
                    attrs.Append("UnitPriceOrLease:{0}", spans[3]);
                    attrs.Append("Apartment:{0}", spans[0]);
                    attrs.Append("ServiceBroker:{0}", spans[5]);
                    attrs.Append("Area:{0}", spans[1]);
    
                    var bo = new HouselistingEntity();
                    attrs.FillEntity(bo);
                    Repository.SaveHouselisting(bo);
                    Crawler.OutWrite("保存小区出售记录 {0}", housesID);
                }
            }
        }
    }
  • 相关阅读:
    prototype.js超强的javascript类库
    MySQL Server Architecture
    Know more about RBA redo block address
    MySQL无处不在
    利用Oracle Enterprise Manager Cloud Control 12c创建DataGuard Standby
    LAMP Stack
    9i中DG remote archive可能导致Primary Database挂起
    Oracle数据库升级与补丁
    Oracle为何会发生归档日志archivelog大小远小于联机重做日志online redo log size的情况?
    Oracle Ksplice如何工作?How does Ksplice work?
  • 原文地址:https://www.cnblogs.com/Googler/p/4241258.html
Copyright © 2011-2022 走看看