zoukankan      html  css  js  c++  java
  • 分享一个天气历史数据的采集脚本

    最近一个项目中需要用到过往的天气数据, 我找到了天气后报这个网站 (www.tianqihoubao.com), 并在SS中完成了相关采集, 和大家分享一下.

    首先分析这个网站提供了两种信息:

    1. 省市关系

    2. 天气记录

    对应的我们创建数据结构:

    [Serializable]
    public class Province
    {
        public string ProvinceName;
        public string ProvinceUrl;
    }
    
    [Serializable]
    public class City
    {
        public Province Province;
        public string CityName;
        public string CityUrl;
    }
    
    [Serializable]
    public class WeatherDataSet
    {
        public City City;
        public string Title;
        public string Url;
    }
    
    [Serializable]
    public class WeatherData
    {
        public WeatherDataSet DataSet;
        public string Date;
        public string TextWeather;
        public string Temp;
        public string Wind;
    }

     

    >> 网站一共提供34个直辖市/省/特区的天气信息, 完整的列表在: http://www.tianqihoubao.com/lishi/index.htm

    对应的采集语句是:

    var list = Default.SelectNodes("#content DT a");

    >> 每个省都有下辖的城市列表, 如: http://www.tianqihoubao.com/lishi/hebei.htm

    对应的采集语句是:

    var list = Default.SelectNodes("#content DD a");

    >> 每个城市都有一个历史天气记录列表, 如: http://www.tianqihoubao.com/lishi/shijiazhuang.html

    对应的采集语句是:

    Default.SelectNodes("#content>div.pcity a");

    >> 进入每条历史天气记录, 就可以得到当月的天气数据了:

    对应的采集语句是:

    var list = Default.SelectNodes("#content>table.b tr:gt(0)");
    foreach(var item in list)
    {
        var date = item.SelectSingleNode("td:eq(0)").Text();
        var textWeather = item.SelectSingleNode("td:eq(1)").Text();
        var temp = item.SelectSingleNode("td:eq(2)").Text();
        var wind = item.SelectSingleNode("td:eq(3)").Text();
    }

    将这些语句分别包装为方法, 并将结果绑定到最开始定义的数据结构中:

    public List<Province> GetProvinceList() {...} //获取直辖市/省/特区
    public List<City> GetCityList(Province province) {...} //获取城市列表
    public List<WeatherDataSet> GetWeatherDataSet(City city) {...} //获取指定城市的天气历史记录集
    public List<WeatherData> GetWeatherData(WeatherDataSet ds) {...} //获取天气历史数据

    >> 完整的脚本: (复制到SS中即可直接运行)

    SS下载地址为: http://www.gdtsearch.com/products.spiderstudio.docapi.htm

    public void Run()
    {
        Logger.ClearAll();
        Default.ScriptErrorsSuppressed = true;
        
        var pl = GetProvinceList();
        foreach(var p in pl)
        {
            Logger.Log(p.ProvinceName);
            Logger.Log(p.ProvinceUrl);
        }
        var cl = GetCityList(pl[1]);
        foreach(var c in cl)
        {
            Logger.Log(c.Province.ProvinceName);
            Logger.Log(c.Province.ProvinceUrl);
            Logger.Log(c.CityName);
            Logger.Log(c.CityUrl);
        }
        var ds = GetWeatherDataSet(cl[1]);
        foreach(var d in ds)
        {
            Logger.Log(d.City.CityName);
            Logger.Log(d.Title);
            Logger.Log(d.Url);
        }
        var dl = GetWeatherData(ds[0]);
        foreach(var d in dl)
        {
            Logger.Log(d.DataSet.Title);
            Logger.Log(d.Date);
            Logger.Log(d.TextWeather);
            Logger.Log(d.Temp);
            Logger.Log(d.Wind);
        }
    } 
    
    
    public List<Province> GetProvinceList()
    {
        Default.Navigate("http://www.tianqihoubao.com/lishi/index.htm");
        Default.Ready("#content DT");
        var list = Default.SelectNodes("#content DT a");
        var result = new List<Province>();
        foreach(var item in list)
        {
            var p = new Province();
            p.ProvinceName = item.Text();
            p.ProvinceUrl = item.Attr("href");
            p.ProvinceUrl = new Uri(Default.Url, p.ProvinceUrl).ToString();
            result.Add(p);
        }
        return result;
    }
    
    public List<City> GetCityList(Province province)
    {
        Default.Navigate(province.ProvinceUrl);
        Default.Ready("#content DD");
        var list = Default.SelectNodes("#content DD a");
        var result = new List<City>();
        foreach(var item in list)
        {
            var c = new City();
            c.Province = province;
            c.CityName = item.Text();
            c.CityUrl = item.Attr("href");
            c.CityUrl = new Uri(Default.Url, c.CityUrl).ToString();
            result.Add(c);
        }
        return result;
    }
    
    public List<WeatherDataSet> GetWeatherDataSet(City city)
    {
        Default.Navigate(city.CityUrl);
        Default.Ready("#content>div.pcity");
        var list = Default.SelectNodes("#content>div.pcity a");
        var result = new List<WeatherDataSet>();
        foreach(var item in list)
        {
            var ds = new WeatherDataSet();
            ds.Title = item.Text();
            ds.Url = item.Attr("href");
            ds.Url = new Uri(Default.Url, ds.Url).ToString();
            ds.City = city;
            result.Add(ds);
        }
        return result;
    }
    
    public List<WeatherData> GetWeatherData(WeatherDataSet ds)
    {
        Default.Navigate(ds.Url);
        Default.Ready("#content>table.b");
        var list = Default.SelectNodes("#content>table.b tr:gt(0)");
        var result = new List<WeatherData>();
        foreach(var item in list)
        {
            var d = new WeatherData();
            d.DataSet = ds;
            d.Date = item.SelectSingleNode("td:eq(0)").Text();
            d.TextWeather = item.SelectSingleNode("td:eq(1)").Text();
            d.Temp = item.SelectSingleNode("td:eq(2)").Text();
            d.Wind = item.SelectSingleNode("td:eq(3)").Text();
            result.Add(d);
        }
        return result;
    }
    
    [Serializable]
    public class Province
    {
        public string ProvinceName;
        public string ProvinceUrl;
    }
    
    [Serializable]
    public class City
    {
        public Province Province;
        public string CityName;
        public string CityUrl;
    }
    
    [Serializable]
    public class WeatherDataSet
    {
        public City City;
        public string Title;
        public string Url;
    }
    
    [Serializable]
    public class WeatherData
    {
        public WeatherDataSet DataSet;
        public string Date;
        public string TextWeather;
        public string Temp;
        public string Wind;
    }
    View Code

    >> 运行效果:

  • 相关阅读:
    第一次使用博客,有点小激动
    oracle中的分区表基本介绍
    【转】Apache Common HttpClient使用之七种武器
    利用Iterator删除List里相近或相同的对象
    [转]给开发维护大型项目的Java开发者的建议
    JavaScript中String对象的一些方法
    [转] ORA00913: 值过多
    [转]HTTP协议详解
    DOM解析xml
    C# 中的 == 和 .Equals()
  • 原文地址:https://www.cnblogs.com/iamzyf/p/3529460.html
Copyright © 2011-2022 走看看