zoukankan      html  css  js  c++  java
  • realestate.cei.gov.cn

    using AnfleCrawler.Common;
    using System;
    using System.Collections.Concurrent;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace AnfleCrawler.DataAnalyzer
    {
        public class ManualAnalyzer : AnalyzerBase
        {
            private ConcurrentQueue<string[]> _dict = new ConcurrentQueue<string[]>();
    
            public override void Init(PageCrawler crawler)
            {
                crawler.Lander.Idle += Lander_Idle;
                base.Init(crawler);
    
                var url = new Uri("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq=20140601&lx=w6&r1=20140830");
                var dom = Crawler.Lander.GetDocument(new PageContentHandler() { Url = url });
                foreach (var node in QueryNodes(dom.DocumentNode, "#qrq option"))
                {
                    string val = node.GetAttributeValue("value", string.Empty);
                    Crawler.PushUrl(new Uri(string.Format("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq={0}&lx=w6&r1=20140830", val)), 1);
                }
            }
            void Lander_Idle(object sender, EventArgs e)
            {
                Crawler.OutWrite("Start step2...");
                App.LogInfo("Start step2...");
                using (var writer = new System.IO.StreamWriter(@"D:outdict.txt", false, Encoding.UTF8))
                {
                    foreach (var set in _dict)
                    {
                        writer.WriteLine(string.Join(",", set));
                    }
                }
            }
    
            protected override void AnalyzeInternal(PageLandEntity current)
            {
                Crawler.OutWrite("*Start step1...");
                var lander = Crawler.Lander;
                var pHandler = CreateContentHandler(current);
                switch (current.Depth)
                {
                    case 1:
                        {
                            var query = System.Web.HttpUtility.ParseQueryString(current.Url.Query);
                            var dt = DateTime.ParseExact(query["rq"], "yyyyMMdd", null);
                            var dom = lander.GetDocument(pHandler);
    
                            var checkNode = QueryNode(dom.DocumentNode, "#str1");
                            if (string.IsNullOrWhiteSpace(checkNode.InnerText))
                            {
                                return;
                            }
                            checkNode.InnerHtml = checkNode.InnerHtml.Replace("<tr", "</tr><tr").Substring(5);
                            App.LogInfo("WTF CN:{0}", checkNode.InnerHtml);
                            var set = QueryNodes(checkNode, "tr");
                            foreach (var node in set)
                            {
                                var x = new List<string>();
                                x.Add(dt.ToString("yyyy-MM-dd"));
                                x.AddRange(QueryTexts(node, "td"));
                                _dict.Enqueue(x.ToArray());
                            }
                            _dict.Enqueue(new string[] { Environment.NewLine });
                            Crawler.OutWrite("#Stop step1 {0} {1}", dt.ToShortDateString(), set.Count());
                        }
                        break;
                }
            }
        }
    }
  • 相关阅读:
    2019-2020-2 20175227张雪莹《网络对抗技术》 Exp3 免杀原理与实践
    2019-2020-2 20175227张雪莹《网络对抗技术》 Exp2 后门原理与实践
    2019-2020-2 20175227张雪莹《网络对抗技术》 Exp1 PC平台逆向破解
    USCOSII
    office word excel等图标显示异常
    Mysql事务
    Mysql 主备配置
    查看mysql已有用户并删除
    在linux下面安装mysql 确认 配置文件路径 my.cnf
    yum提示Another app is currently holding the yum lock; waiting for it to exit...
  • 原文地址:https://www.cnblogs.com/Googler/p/4110974.html
Copyright © 2011-2022 走看看