zoukankan      html  css  js  c++  java
  • 某查查企业爬虫(模拟人工验证码)

    1、.net core 3.1 C#  selenium 

    //爬取所有省份、城市、区县
    public override async Task WriteAreaToFileAsync(string configPath, string directory)
            {
                const string BaseProvinceLink = "https://www.qcc.com/search?key={keyword}#industrycode:K&";
                const string BaseCityLink = "https://www.qcc.com/search_getCityListHtml?province={0}";
                const string BaseCountyLink = "https://www.qcc.com/search_getCountyListHtml?city={0}";
                List<string> provinces = await GetCodeAsync(new Uri(BaseProvinceLink), ".sfilter-tag.clearfix.provinceChoose dd a");
                const string baseText = "province:{0}&city:{1}&county:{2}&";
                List<string> list = new List<string>();
                foreach (var province in provinces)
                {
                    StringBuilder.Clear();
                    Uri provinceUri = new Uri(StringBuilder.AppendFormat(BaseCityLink, province).ToString());
                    var cities = await GetCodeAsync(provinceUri, "dd a");
                    foreach (var city in cities)
                    {
                        StringBuilder.Clear();
                        Uri cityUri = new Uri(StringBuilder.AppendFormat(BaseCountyLink, city).ToString());
                        var counties = await GetCodeAsync(cityUri, "dd a");
                        foreach (var county in counties)
                        {
                            StringBuilder.Clear();
                            StringBuilder.Append(BaseProvinceLink);
                            string area = StringBuilder.AppendFormat(baseText, province, city, county).
                                Replace("search", "search_index").Replace("中介#", "中介&ajaxflag=1&")
                                .Replace(":industrycode", "=industrycode").ToString();
                            list.Add(area);
                        }
                    }
                }
                await File.WriteAllLinesAsync("企查查.txt", list);
            }
    
    //分页爬取企业信息
            private async Task<bool> GetAgentsAsync(Uri cityUri)
            {
                LogHelper.Info(cityUri.ToString());
                var pageSource = await HttpClient.GetStringAsync(cityUri);
                while (!pageSource.Contains("查企业"))
                {
                    if (pageSource.StartsWith("<script>window.location"))
                    {
                        VertifyCode(new Uri(pageSource.Split("'")[1]));
                        pageSource = await HttpClient.GetStringAsync(cityUri);
                    }
                    else if (pageSource.Contains("小查还没找到数据"))
                    {
                        return false;
                    }
                }
                var block = JumonyParser.Parse(pageSource).Find(".m_srchList tbody tr td:nth-child(3)");
                foreach (var item in block)
                {
                    await VertifyAsync(item.InnerHtml());
                }
                if (block.Count() < PageSize)
                {
                    return false;
                }
                return true;
            }
    

    2、结果截图

    3、需要开通vip账号

    4、过滑动验证码

  • 相关阅读:
    SQL SERVER 2005中同义词实例
    内聚性是模块之所以成为模块的原因--一个中心、单一职责
    软件开发的方法论
    系统集成与软件开发
    编程的本质是构建---建构你想要表达的世界
    编程思想与以人为本-编程的本质
    软件开发之道-软件开发背后的哲学
    swift 协议(结合扩展)的特点
    swift的特性:扩展、协议、泛型
    从数据流角度管窥 Moya 的实现(一):构建请求
  • 原文地址:https://www.cnblogs.com/Zdelta/p/14122308.html
Copyright © 2011-2022 走看看