zoukankan      html  css  js  c++  java
  • zlhome.com Deal

    using AnfleCrawler.Common;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace AnfleCrawler.DataAnalyzer
    {
        internal class Zlhome : AnalyzerBase
        {
            protected override void AnalyzeInternal(PageLandEntity current)
            {
                var lander = Crawler.Lander;
                var pHandler = CreateContentHandler(current);
                switch (current.Depth)
                {
                    case 0:
                        {
                            var dom = lander.GetDocument(pHandler);
                            DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child");
    
                            foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))
                            {
                                var url = GetHref(node, current.Url);
                                Crawler.PushUrl(url, DataDepth.Houses);
                            }
                        }
                        break;
                    case DataDepth.Houses:
                        {
                            var dom = lander.GetDocument(pHandler);
                            var attrs = new AttributeFiller();
    
                            attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty));
    
                            attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li"));
    
                            Guid hashKey = GenHashKey(current.Url.OriginalString);
                            var bo = Crawler.Repository.LoadHouses(hashKey);
                            bo.SiteID = "Zlhome.com";
                            bo.PageUrl = current.Url.OriginalString;
                            bo.CityName = Crawler.Config.CityName;
                            attrs.FillEntity(bo, new Dictionary<string, string>()
                            {
                                {"地址", "小区地址"},
                                {"所属片区", "所属区域"},
                                {"物业类型", "物业类别"},
                                {"骏工日期", "竣工时间"},
                            });
                            MapMark(bo);
                            Repository.Save(bo);
                            Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
    
                            var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip(1).First();
                            var dealNode = QueryNode(pNode, "a");
                            var url = GetHref(dealNode, current.Url);
                            Crawler.PushUrl(url, DataDepth.Deal, bo.RowID);
                        }
                        break;
                    case DataDepth.Deal:
                        {
                            Guid housesID = (Guid)current.State;
                            var dom = lander.GetDocument(pHandler);
    
                            bool isRent = false;
                            foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable"))
                            {
                                foreach (var node in QueryNodes(table, "tr"))
                                {
                                    var spans = QueryTexts(node, "td").ToArray();
                                    DateTime? transactionDate = null;
                                    DateTime dump;
                                    if (DateTime.TryParse(spans[0], out dump))
                                    {
                                        transactionDate = dump;
                                    }
                                    Repository.SaveHouselisting(new HouselistingEntity()
                                    {
                                        HousesID = housesID,
                                        TransactionDate = transactionDate,
                                        Area = spans[1],
                                        Apartment = spans[3],
                                        Orientation = spans[4],
                                        Floor = spans[5],
                                        UnitPriceOrLease = spans[6],
                                        SoldPriceOrRent = spans[7],
                                        ServiceBroker = spans[8],
                                        IsRent = isRent
                                    });
                                    Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售");
                                }
                                isRent = true;
                            }
                        }
                        break;
                }
            }
        }
    }
  • 相关阅读:
    Newtonsoft.Json序列化 对时间格式化处理
    CSS常用提示浮出层的写法
    避免常见的6种HTML5错误用法
    varchar(Max) 对应SqlParameter 数据类型长度改为1
    IIS指定域名不能调试解决办法
    如何弹出固定大小及内容的网页窗口
    CSS图片圆角框的灵活处理
    精通CSS:高级Web标准解决方案(中文电子书下载)
    AjaxControlToolKit(整理)三.......(35个控件)简单介绍
    CSS通用开发库
  • 原文地址:https://www.cnblogs.com/Googler/p/4272669.html
Copyright © 2011-2022 走看看