zoukankan      html  css  js  c++  java
  • Mytophome Deal

    using AnfleCrawler.Common;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace AnfleCrawler.DataAnalyzer
    {
        internal class Mytophome : AnalyzerBase
        {
            protected override void AnalyzeInternal(PageLandEntity current)
            {
                var lander = Crawler.Lander;
                var pHandler = CreateContentHandler(current);
                switch (current.Depth)
                {
                    case 0:
                        {
                            var dom = lander.GetDocument(pHandler);
                            var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
                            nextNode.SetAttributeValue("id", PagingHack);
                            DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack));
    
                            foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
                            {
                                var Nset = QueryNodes(node, "span").ToArray();
                                var hUrl = GetHref(QueryNode(Nset[1], "a"), current.Url);
                                var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
                                string shid = query["estateId"];
                                hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
                                Guid housesID;
                                try
                                {
                                    CheckHouses(hUrl, out housesID);
                                }
                                catch (HtmlNodeMissingException ex)
                                {
                                    App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
                                    continue;
                                }
    
                                var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
                                DateTime? transactionDate = null;
                                DateTime dump;
                                if (DateTime.TryParse(vals.Last(), out dump))
                                {
                                    transactionDate = dump;
                                }
                                if (vals.Length == 6)
                                {
                                    Repository.SaveHouselisting(new HouselistingEntity()
                                    {
                                        HousesID = housesID,
                                        TransactionDate = transactionDate,
                                        BuildingName = vals[2],
                                        Area = string.Format("{0}平方", vals[3]),
                                        SoldPriceOrRent = string.Format("{0}万", vals[4]),
                                        UnitPriceOrLease = string.Format("{0}元/平方", vals[5]),
                                    });
                                }
                                else
                                {
                                    Repository.SaveHouselisting(new HouselistingEntity()
                                    {
                                        HousesID = housesID,
                                        TransactionDate = transactionDate,
                                        Area = string.Format("{0}平方", vals[2]),
                                        SoldPriceOrRent = string.Format("{0}万", vals[3]),
                                        UnitPriceOrLease = string.Format("{0}元/平方", vals[4]),
                                    });
                                }
                                Crawler.OutWrite("保存小区出售记录 {0}", housesID);
                            }
                        }
                        break;
                }
            }
    
            private void CheckHouses(Uri housesUrl, out Guid housesID)
            {
                var pHandler = CreateContentHandler(new PageLandEntity()
                {
                    Url = housesUrl,
                    Depth = DataDepth.Houses
                });
                pHandler.AjaxBlocks.Add(HACK);
                var dom = Crawler.Lander.GetDocument(pHandler);
                var attrs = new AttributeFiller();
    
                attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li"));
    
                housesID = GenHashKey(housesUrl.OriginalString);
                var bo = Crawler.Repository.LoadHouses(housesID);
                if (!string.IsNullOrEmpty(bo.SiteID))
                {
                    return;
                }
                bo.SiteID = "Mytophome.com";
                bo.PageUrl = housesUrl.OriginalString;
                bo.CityName = Crawler.Config.CityName;
                attrs.FillEntity(bo, new Dictionary<string, string>()
                {
                    {"楼盘名称", "小区名称"},
                    {"楼盘地址", "小区地址"},
                    {"发展商", "开发商"},
                    {"物管公司", "物业公司"},
                    {"物管电话", "物业办公电话"},
                });
                MapMark(bo);
                Crawler.Repository.Save(bo);
                Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
            }
        }
    }
  • 相关阅读:
    转:imageNamed和dataWithContentsOfFile的区别
    [内存管理实践 之 1]在返回按钮中,释放内存
    转:当程序崩溃的时候怎么办 Part2
    iOS 内存管理,我们需要一套切实可行的实践指导书,而不是理论指导书
    转 iOS程序中调用系统自带应用(短信,邮件,浏览器,地图,appstore,拨打电话)
    iOS笔记:内存管理
    转:【图文教程】创建Xcode自定义模板
    判断两个数的大小,返回其中的大者/小者
    iOS全局变量与属性的内存管理
    UIImage 详解
  • 原文地址:https://www.cnblogs.com/Googler/p/4272703.html
Copyright © 2011-2022 走看看