zoukankan      html  css  js  c++  java
  • 获取大众点评数据

    使用.NET实现一个从大众点评抓取一些基础数据(商家名称,地址,电话,经纬度)的小程序。

    实现逻辑:

    1、以异步方式从列表上获取前三项(商家名称、地址、电话);

    2、把获取下来的数据保存到数据库表里;

    3、把存在数据表里的地址信息读取出来,通过调用QQ地图API把地址转化成经纬度;

    4、按行更新GIS信息。

    代码如下:

    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.Data;
    using System.Data.SqlClient;
    using System.Diagnostics;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Web;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Xml;
    
    namespace DianPing_MeiFa
    {
        public partial class test : System.Web.UI.Page
        {
            protected void Page_Load(object sender, EventArgs e)
            {
               // string url = "http://www.dianping.com/search/category/2/50/p";
                //this.lblNames.Text = "begin...";
                //for (int i = 1; i < 51; i++)//循环分页
                //{
                //    string url = "http://www.dianping.com/search/category/2/50/p";
                //    url += i;
                //    this.SaveMeiFaData(url);
                //}
                this.setMapLocation();
            }
    
            /// <summary>
            /// 获取数据
            /// </summary>
            /// <returns></returns>
            private DataTable GetMeifa()
            {
                string sql = "SELECT * FROM t_meifa";
                DataSet ds = SqlHelper.ExecuteDataset(SqlHelper.GetConnection(), CommandType.Text, sql);
                return ds.Tables[0];
            }
    
    
            private void setMapLocation()
            {
                DataTable dt = this.GetMeifa();
                IList<MeiFa> mfList = new List<MeiFa>();
                //将DataTable转化成对象
                foreach (DataRow dr in dt.Rows)
                {
                    MeiFa mf = new MeiFa
                    {
                        Id = int.Parse(dr["id"].ToString()),
                        Name = dr["name"].ToString(),
                        Address = dr["address"].ToString()
                    };
                    mfList.Add(mf);
                }
    
                //设置获经纬度
                foreach (MeiFa mf in mfList)
                {
                    QQMapGeocoder qmg = this.GetGeocoder(mf.Address);
                    mf.lat = qmg.result.location.lat;
                    mf.lng = qmg.result.location.lng;
    
                    this.UpdateMeiFaLocation(mf);
                }
    
            }
    
            private void UpdateMeiFaLocation(MeiFa mf)
            {
                string sql = "UPDATE t_meifa_bak SET lng=@lng,lat=@lat WHERE id=@id";
                SqlParameter[] sps ={
                                        new SqlParameter("@lng",mf.lng),
                                        new SqlParameter("@lat",mf.lat),
                                        new SqlParameter("@id",mf.Id),
                                    };
    
                SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);
            }
    
            /// <summary>
            /// 根据地理位置获取经纬度
            /// </summary>
            /// <param name="address">地址</param>
            /// <returns></returns>
            private QQMapGeocoder GetGeocoder(string address)
            {
                string apiMapUrl = "http://apis.map.qq.com/ws/geocoder/v1/?region=北京&address={0}&key=Y5QBZ-DEDR4-3W3U7-XL37W-VVMT6-3KB6K";
                apiMapUrl = string.Format(apiMapUrl, address);
                HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(apiMapUrl);
                request.Method = "GET";
    
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream newstream = response.GetResponseStream();
                StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);
                string json = srRead.ReadToEnd();
                QQMapGeocoder qmg = QQMapGeocoder.DeserializeGeocoder(json);
                return qmg;
            }
    
            /// <summary>
            /// 将列表保存到数据库
            /// </summary>
            /// <param name="url"></param>
            private void SaveMeiFaData(string url)
            {
                HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
                request.Method = "GET";
                request.KeepAlive = true;
                request.ContentType = "application/x-www-form-urlencoded";
    
                request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8";
    
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream newstream = response.GetResponseStream();
                StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);
                string outString = srRead.ReadToEnd();
    
                IList<MeiFa> mfList = this.getMeiFaList(outString);
                this.InsertDb(mfList);
            }
    
    
            private string GetContent(IList<string> list)
            {
                string str = string.Empty;
                foreach (string s in list)
                {
                    str += s;
                }
                return str;
            }
    
            /// <summary>
            /// 将列表数据转化成对象
            /// </summary>
            /// <param name="html"></param>
            /// <returns></returns>
            public IList<MeiFa> getMeiFaList(string html)
            {
                IList<MeiFa> mfList = new List<MeiFa>();
                string reg = @"<ul[^>]*class=""detail""[^>]*>[sS]*?</ul>";
                MatchCollection mc = Regex.Matches(html, reg);
                foreach (Match m in mc)
                {
                    string strDom = m.Value;
                    strDom = strDom.Replace("&nbsp;", " ");
                    MeiFa mf = MeiFa.CreateMeifa(strDom);
                    mfList.Add(mf);
                }
                return mfList;
            }
    
            public string ReplaceHtml(string HTMLStr)
            {
                return Regex.Replace(HTMLStr, "<[^>]*>", "");
            }
    
            public void InsertDb(IList<MeiFa> mfs)
            {
                foreach (var mf in mfs)
                {
                    if (string.IsNullOrEmpty(mf.Name))
                        continue;
    
                    try
                    {
                        this.InsertDb(mf);
                    }
                    catch
                    {
                        continue;
                    }
                }
            }
    
            /// <summary>
            /// 插入到数据表里
            /// </summary>
            /// <param name="mf"></param>
            public void InsertDb(MeiFa mf)
            {
                string sql = "INSERT INTO t_meifa(name,address,tel) values(@name,@address,@tel)";
    
                SqlParameter[] sps ={
                                        new SqlParameter("@name",mf.Name),
                                        new SqlParameter("@address",mf.Address),
                                        new SqlParameter("@tel",mf.Tel),
                                    };
    
    
    
                SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);
            }
    
    
            public class MeiFa
            {
                public int Id { get; set; }
                public string Name { get; set; }
                public string Address { get; set; }
                public string Tel { get; set; }
                public float? lng { get; set; }
                public float? lat { get; set; }
    
                public MeiFa()
                {
    
                }
    
                public static MeiFa CreateMeifa(string domStr)
                {
                    MeiFa m = new MeiFa();
                    try
                    {
                        Debug.WriteLine(domStr);
    
                        XmlDocument dom = new XmlDocument();
                        dom.LoadXml(domStr);
    
                        XmlNode nameNode = dom.SelectSingleNode("//ul/li[@class="shopname"]/a[@data-hippo-type="shop"]");
                        m.Name = nameNode.InnerText;
    
                        XmlNode addressNode = dom.SelectSingleNode("//ul/*/li[@class="address"]");
                        string at = ReplaceAddress(addressNode.InnerText);
                        string[] ats = getArr(at);
                        m.Address = ats[0];
                        m.Tel = ats[1];
                    }
                    catch
                    {
                        m = new MeiFa();
                    }
                    return m;
                }
                /// <summary>
                /// 生成数组
                /// </summary>
                /// <param name="str"></param>
                /// <returns></returns>
                static private string[] getArr(string str)
                {
                    string[] tempArr = str.Split(' ');
                    List<string> ss = new List<string>();
                    for (int i = 0; i < tempArr.Length; i++)
                    {
                        if (!string.IsNullOrEmpty(tempArr[i]))
                        {
                            ss.Add(tempArr[i]);
                        }
                    }
    
                    return ss.ToArray();
                }
    
                static private string ReplaceAddress(string s)
                {
                    s = s.Replace("地址:", "");
                    s = s.Replace("
    ", "");
                    return s;
                }
            }
        }
    }
    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    
    namespace DianPing_MeiFa
    {
        //[JsonProperty]
        public class QQMapGeocoder
        {
            public int status { get; set; }
            public string message { get; set; }
            public QQMapResult result { get; set; }
    
            public static QQMapGeocoder DeserializeGeocoder(string jsonStr)
            {
                JsonSerializerSettings jsz = new JsonSerializerSettings();
                QQMapGeocoder qg = JsonConvert.DeserializeObject<QQMapGeocoder>(jsonStr, jsz);
                return qg;
            }
        }
    
        public class QQMapResult
        {
            public QQMapLocation location { get; set; }
            public QQMapAddressComponents address_components { get; set; }
            public string similarity { get; set; }
        }
    
        public class QQMapLocation
        {
            public float? lng { get; set; }
            public float? lat { get; set; }
        }
    
        public class QQMapAddressComponents
        {
            public string province { get; set; }
            public string city { get; set; }
            public string district { get; set; }
            public string street { get; set; }
            public string street_number { get; set; }
    
        }
        
    }

    注:仅供学习使用!

  • 相关阅读:
    常见问题汇总
    python的正则表达式
    Python 程序读取外部文件、网页的编码与JSON格式的转化
    第三方仓库PyPI
    文件名称一定不要设置为某些模块名称,会引起冲突!
    迟来的博客
    FENLIQI
    fenye
    Notif
    phpv6_css
  • 原文地址:https://www.cnblogs.com/wfcfan/p/3746141.html
Copyright © 2011-2022 走看看