zoukankan      html  css  js  c++  java
  • java-selenium下载百度图片

    package download;
    
    import java.io.DataInputStream;
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.InputStream;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.commons.io.input.ReaderInputStream;
    import org.openqa.selenium.By;
    import org.openqa.selenium.WebDriver;
    import org.openqa.selenium.WebElement;
    import org.openqa.selenium.chrome.ChromeDriver;
    
    
    public class Geturl {
    	
    	public static List<String> geturls(String baseUrl){
    		List<String> urllist=new ArrayList<String>();
    		System.setProperty("webdriver.chrome.driver", "E:\\webDriver\\chromedriverV2.28.exe");
    		
    		WebDriver driver = new ChromeDriver();
    		driver.get(baseUrl);
    		
    		//获取所有img标签
    		List<WebElement> imgList = driver.findElements(By.tagName("img"));
    		System.out.println(imgList.size());
    		
    		try {
    			Thread.sleep(10000);
    		} catch (Exception e) {
    			e.printStackTrace();
    		}
    		//便利所有标签
    		try {
    			for (WebElement a : imgList) {
    				//System.out.println(a.getText());
    				System.out.println(a.getAttribute("src"));//获取img标签中的data-imgurl  data-imgurl
    				
    				//获取img标签data-imgurl属性值
    				String urlStr = a.getAttribute("src");
    				/*if(urlStr.contains(".jpg")) {
    					urllist.add(urlStr);
    				}*/
    				
    			}
    		} catch (Exception e) {
    			e.printStackTrace();
    		}
    		
    		
    		return urllist;
    		
    	}
    	
    
    	public static void downloadImg(List<String> urllist) throws Exception{
    		URL url=null;
    		int imageNumber = 0;
    		for(String urlString:urllist) {
    			url = new URL(urlString);
    			DataInputStream dis = new DataInputStream(url.openStream());
    			String imageName ="C:\Users\0\Pictures\插画\photos"+ imageNumber +".jpg";
    			FileOutputStream fos =new FileOutputStream(new File(imageName));
    			
    			byte[] buffer = new byte[1024];
    			int length;
    			while((length = dis.read(buffer))>0) {
    				fos.write(buffer, 0, length);
    			}
    			dis.close();
    			fos.close();
    			imageNumber++;
    		}
    	}
    	
    	
    	
    	public static void main(String[] args) {
    		
    		List<String> urllist=geturls("https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B2%E5%BB%AD&fr=ala&ala=1&alatpl=adress&pos=0&hs=2&xthttps=111111");
    		System.out.println(urllist.size());
    		
    		try {
    			downloadImg(urllist);
    		} catch (Exception e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    		
    	}
    }
    

      

  • 相关阅读:
    dell 服务器服务编码查询方法(Win & linux)
    English Voice of <<Wish You Were Here>>
    V3
    研究2张物理网卡 1台物理服务器 3个光猫 实现的离线下载服务器微架构 (3 光猫)
    注解
    单例模式
    线程同步锁
    线程实现的两种方式
    多线程简介
    Map接口
  • 原文地址:https://www.cnblogs.com/sincoolvip/p/7676950.html
Copyright © 2011-2022 走看看