zoukankan      html  css  js  c++  java
  • Jsoup获取DOM元素

    (1)doc.getElementsByTag(String tagName);

    (2)doc.getElementById(String id);

    (3)doc.getElementsByClass(String className);

    (4)doc.getElementsByAttribute(String key);

    elements=document.getElementsByAttribute("width");
    for(Element e:elements){
    	System.out.println(e.toString());
    }
    

      

    (5)doc.getElementsByAttributeValue(String key,String value);

    示例:通过key-value查找src=“/images/logo_small.gif”的元素

    		//根据key-value名称来查询DOM(查找src="")
    		elements=document.getElementsByAttributeValue("src", "/images/logo_small.gif");
    		System.out.println(elements.get(0).toString());
    

    示例:通过key-value查找target=“_blank”的元素

    		elements=document.getElementsByAttributeValue("target","_blank");
    		for(Element e:elements){
    			System.out.println(e.toString());
    		}
    

      

     使用document.select();选择元素

    通过class一级一级往下找

    package com.oracle.zibo;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.HttpHost;
    import org.apache.http.client.config.RequestConfig;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.util.EntityUtils;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    public class Demo2 {
    
    	public static void main(String[] args) throws Exception {
    		CloseableHttpClient closeableHttpClient=HttpClients.createDefault();
    		HttpGet httpGet=new HttpGet("http://www.bootcss.com/");
    			
    		CloseableHttpResponse closeableHttpResponse=closeableHttpClient.execute(httpGet);
    		HttpEntity httpEntity=closeableHttpResponse.getEntity(); //获取实体、网页内容
    			
    		String str=EntityUtils.toString(httpEntity, "utf-8");
    		
    		closeableHttpResponse.close();
    		closeableHttpClient.close();
    		
    		Document document=Jsoup.parse(str); //解析网页
    		
    		//查找bootstrap主页下的所有标题
    		Elements elements=document.select(".row .thumbnail .caption h3 a");
    		for(Element e:elements){
    			System.out.println(e.text());
    		}
    	}
    
    }
    

      

    使用a["href"]

    查找所有带href属性的a标签

    		//查找a[href]
    		Elements elements=document.select("a[href]");
    		for(Element e:elements){
    			System.out.println(e.html());
    		}
    

    使用"img[src$=.png]"

    查找扩展名为.png的图片的元素

    		Elements elements=document.select("img[src$=.png]");
    		for(Element e:elements){
    			System.out.println(e.toString());
    		}
    

      

    取得我们需要的信息

    		Elements elements=document.select("img[src$=.png]");
    		for(Element e:elements){
    			System.out.println(e.toString());
    			System.out.println(e.text()); //取得标签中的内容
    			System.out.println(e.html()); //取得标签中的html代码
    			System.out.println(e.attr("src")); //取得某属性的属性值
    		}

    e.attr(属性),返回属性值

    .first()取得第一个

    .last()取得最后一个

    Element element=document.select("img[src$=.gif]").first();
    System.out.println(element.attr("src")); //取得某属性的属性值
    

      

  • 相关阅读:
    opencv+python实时人脸检测、磨皮
    opencv人脸检测
    均值模糊、中值模糊、自定义模糊
    双边滤波
    表面模糊
    水纹滤镜
    爬取https网站
    字符串、数组、切片、map
    tcpdump抓包和Wireshark解包
    iptables详解
  • 原文地址:https://www.cnblogs.com/mengxinrenyu/p/7635492.html
Copyright © 2011-2022 走看看