using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer { internal class Zlhome : AnalyzerBase { protected override void AnalyzeInternal(PageLandEntity current) { var lander = Crawler.Lander; var pHandler = CreateContentHandler(current); switch (current.Depth) { case 0: { var dom = lander.GetDocument(pHandler); DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child"); foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a")) { var url = GetHref(node, current.Url); Crawler.PushUrl(url, DataDepth.Houses); } } break; case DataDepth.Houses: { var dom = lander.GetDocument(pHandler); var attrs = new AttributeFiller(); attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty)); attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li")); Guid hashKey = GenHashKey(current.Url.OriginalString); var bo = Crawler.Repository.LoadHouses(hashKey); bo.SiteID = "Zlhome.com"; bo.PageUrl = current.Url.OriginalString; bo.CityName = Crawler.Config.CityName; attrs.FillEntity(bo, new Dictionary<string, string>() { {"地址", "小区地址"}, {"所属片区", "所属区域"}, {"物业类型", "物业类别"}, {"骏工日期", "竣工时间"}, }); MapMark(bo); Repository.Save(bo); Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip(1).First(); var dealNode = QueryNode(pNode, "a"); var url = GetHref(dealNode, current.Url); Crawler.PushUrl(url, DataDepth.Deal, bo.RowID); } break; case DataDepth.Deal: { Guid housesID = (Guid)current.State; var dom = lander.GetDocument(pHandler); bool isRent = false; foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable")) { foreach (var node in QueryNodes(table, "tr")) { var spans = QueryTexts(node, "td").ToArray(); DateTime? transactionDate = null; DateTime dump; if (DateTime.TryParse(spans[0], out dump)) { transactionDate = dump; } Repository.SaveHouselisting(new HouselistingEntity() { HousesID = housesID, TransactionDate = transactionDate, Area = spans[1], Apartment = spans[3], Orientation = spans[4], Floor = spans[5], UnitPriceOrLease = spans[6], SoldPriceOrRent = spans[7], ServiceBroker = spans[8], IsRent = isRent }); Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售"); } isRent = true; } } break; } } } }