zoukankan      html  css  js  c++  java
  • Jsoup解析HTML

    1 在解析HTML之前还需导入jsoup-1.10.2.jar

    2 解析HTML,代码如下:

    package com.od.cn;
    
    import java.io.BufferedWriter;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    
    import org.apache.log4j.Logger;
    import org.apache.log4j.PropertyConfigurator;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    public class JsoupParserHtml {
    	private static final Logger LOGGER=Logger.getLogger(JsoupParserHtml.class);
    	
    	//从网上把天气爬下来
    	private List<Weather> parserHtmlByHttp(String url){
    		List<Weather> weathers=new ArrayList<Weather>();
    		try {
    			Document document=Jsoup.connect(url).get();
    			Elements classes=document.getElementsByClass("part_se");
    			for(Element ele:classes){
    				String data_role=ele.attr("data-role");
    				if("collapsible".equals(data_role)){
    					Elements h1=ele.select("h1");
    					Elements td=ele.select("td");
    					Weather weather=new Weather();
    					weather.setArea(h1.text());
    					weather.setAirTemperature(td.get(1).text());
    					weather.setRainFall(td.get(3).text());
    					weather.setRelativeWet(td.get(5).text());
    					weather.setWindPower(td.get(7).text());
    					weather.setWindDirection(td.get(9).text());
    					weather.setDate(td.get(11).text());
    					weathers.add(weather);
    				}
    			}
    		} catch (IOException e) {
    			LOGGER.error("解析网页异常:"+e.getMessage());
    		}
    		LOGGER.info("成功获取网页数据");
    		return weathers;
    	}
    	
    	//以json的格式保存到文本中
    	private void saveFile(List<Weather> weathers){
    		if(weathers!=null){
    			SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
    			StringBuffer buffer=new StringBuffer();
    			buffer.append("{date:""+sdf.format(new Date())+"",data[");
    			for(int i=0;i<weathers.size();i++){
    				Weather weather=weathers.get(i);
    				if(i==weathers.size()-1){
    					buffer.append("{area:""+weather.getArea()+"",airTemperature:""+weather.getAirTemperature()+"",rainFall:""+weather.getRainFall()+
    							"",relativeWet:""+weather.getRelativeWet()+"",windPower:""+weather.getWindPower()+"",windDirection:""+weather.getWindDirection()+"",dateTime:""+weather.getDate()+""}");
    					
    				}else{
    					buffer.append("{area:""+weather.getArea()+"",airTemperature:""+weather.getAirTemperature()+"",rainFall:""+weather.getRainFall()+
    							"",relativeWet:""+weather.getRelativeWet()+"",windPower:""+weather.getWindPower()+"",windDirection:""+weather.getWindDirection()+"",dateTime:""+weather.getDate()+""},");
    				}
    			}
    			buffer.append("]}");
    			BufferedWriter bw=null;
    			try {
    				 bw=new BufferedWriter(new FileWriter("d:\weather.txt"));
    				bw.write(buffer.toString());
    				bw.flush();
    				LOGGER.info("已保存文件");
    			} catch (IOException e) {
    				LOGGER.error("保存文件异常:"+e.getMessage());
    			}finally{
    				if(bw!=null){
    					try {
    						bw.close();
    					} catch (IOException e) {
    						LOGGER.error("关闭流异常:"+e.getMessage());
    					}
    				}
    			}
    		}
    	}
    	
    	public static void main(String[] args) {
    		PropertyConfigurator.configure("WebRoot/conf/log4j.properties");
    		LOGGER.info("启动程序");
    		JsoupParserHtml jph=new JsoupParserHtml();
    		List<Weather> weathers=jph.parserHtmlByHttp("http://www.zhpmsc.org.cn/WeChat/monitorController/zoneSk?winzoom=1#");
    		jph.saveFile(weathers);
    		LOGGER.info("程序结束");
    	}
    
    }
    


  • 相关阅读:
    致初学者:PHP比ASP优秀的七个理由
    有情人终成眷属为好友hualex2006.12.9结婚祝福
    有情人终成眷属为好友hualex2006.12.9结婚祝福
    各种查找算法效率比较
    实习三 树、二叉树及其应用 (题目:唯一地确定一棵二叉树 )
    hdu 2188 选拔志愿者(博弈)
    hdu 1050Moving Tables(贪心)
    实习一 线性表及其应用 (题目:一元稀疏多项式的加法运算 )
    实习六 农夫过河问题
    实习二 栈、队列和递归算法设计 (题目:停车场管理 )
  • 原文地址:https://www.cnblogs.com/t0404/p/10290972.html
Copyright © 2011-2022 走看看