Selenium WebDriver 用于模拟浏览器的功能,可以做网站测试用,也可以用来做crawler。我是用eclipse开发的,导入selenium-server-standalone-***.jar(Right click project -->Properties --> Java Buid Path --> Libraries --> Add External Jar...)。这个包可以在Selenium官网下载。
下面的代码是简单的跟一个网站做交互:
1 public class IndeedJobSearch { 2 3 public static void main(String[] args) throws InterruptedException { 4 // TODO Auto-generated method stub 5 6 //Create firefox driver to drive the browser 7 //File pathBinary = new File("D:\Programes\tools\FireFox\firefox.exe"); 8 //FirefoxBinary Binary = new FirefoxBinary(pathBinary); 9 //FirefoxProfile firefoxPro = new FirefoxProfile(); 10 //WebDriver driver = new FirefoxDriver(Binary, firefoxPro); 11 //ChromeDriver or ie 12 System.setProperty("webdriver.chrome.driver",//webdriver.ie.driver 13 "D:\Projects\JavaWorkspace\ThirdPartyLibs\WebDriver\chromedriver.exe");//IEDriverServer.exe 14 WebDriver driver = new ChromeDriver(); //InternetExplorerDriver 15 16 //Open Indeed home page 17 driver.get("http://www.indeed.hk/"); 18 //Find what field and enter Selenium 19 Thread.sleep(2000); 20 driver.findElement(By.id("what")).sendKeys("Selenium"); //"findElements" will return all 21 //Find location field and enter London 22 driver.findElement(By.id("where")).clear(); 23 Thread.sleep(2000); 24 driver.findElement(By.id("where")).sendKeys("Hong Kong"); 25 //Find FindJobs button and click on it 26 Thread.sleep(2000); 27 driver.findElement(By.id("fj")).click(); 28 //From job search results page, get page title and jobs count message 29 30 System.out.println(driver.getTitle()); 31 System.out.println(driver.findElement(By.id("searchCount")).getText()); 32 33 driver.close(); 34 } 35 36 }
要抓数据就要用方法来定位数据,下面是普遍用到的数据定位方法:
1 public class LocatingStrategies { 2 3 public static void main(String[] args) throws InterruptedException { 4 // TODO Auto-generated method stub 5 6 //Create firefox driver to drive the browser 7 File pathBinary = new File("D:\Programes\tools\FireFox\firefox.exe"); 8 FirefoxBinary Binary = new FirefoxBinary(pathBinary); 9 FirefoxProfile firefoxPro = new FirefoxProfile(); 10 WebDriver driver = new FirefoxDriver(Binary, firefoxPro); 11 12 //Open Indeed home page 13 driver.get("http://www.indeed.hk/"); 14 15 //Locating by ID 16 //driver.findElement(By.id("what")).sendKeys("Selenium"); //"findElements" will return all 17 18 //Locating by name 19 //driver.findElement(By.name("q")).sendKeys("Selenium"); 20 21 //Locating by LinkText 22 //driver.findElement(By.linkText("建立個人檔案")).click(); 23 24 //Locating by partialLinkText 25 //driver.findElement(By.partialLinkText("招聘廣告")).click(); 26 27 //Locating by Xpath By.xpath("//input[@placeholder='email']")).sendKeys("User NAme"); 28 // System.out.println( 29 // 30 // driver.findElement(By.xpath("//img[@title='Indeed 香港']")) 31 // .getAttribute("src") 32 // 33 // ); 34 35 //Locating by CssSelector By.cssSelector("css=input.input_submit")).click(); 36 //driver.findElement(By.cssSelector("input.input_submit")).click(); 37 38 //Locating by Tagname 39 System.out.println( 40 41 driver.findElements(By.tagName("a")).size() 42 43 ); 44 45 //Locating by ClassName 46 System.out.println( 47 48 driver.findElements(By.className("input_text")).size() 49 50 ); 51 driver.findElement(By.className("input_text")).sendKeys("Selenium"); 52 } 53 54 }
对javacript的popups: alert, prompt, confirm box的交互处理:
1 public class JavaScriptPopups { 2 3 public static void main(String[] args) throws InterruptedException { 4 System.setProperty("webdriver.chrome.driver", 5 "D:\Projects\JavaWorkspace\ThirdPartyLibs\WebDriver\chromedriver.exe");//IEDriverServer.exe 6 WebDriver driver = new ChromeDriver(); 7 8 driver.get("http://test1.absofttrainings.com/javascript-alert-confirm-prompt-boxes/"); 9 System.out.println( 10 11 driver.findElements(By.className("headerText")).size() 12 13 ); 14 //Thread.sleep(6000); 15 driver.findElement(By.xpath("//button[text()='Display Alert Box']")).click(); 16 17 Alert alert = driver.switchTo().alert(); 18 System.out.println( 19 "Alert text: " + alert.getText() 20 ); 21 Thread.sleep(2000); 22 alert.accept(); 23 driver.switchTo().defaultContent(); 24 System.out.println(driver.findElement(By.id("confirm")).getText() 25 ); 26 driver.findElement(By.id("prompt")).click(); 27 alert = driver.switchTo().alert(); 28 Thread.sleep(3000); 29 alert.sendKeys("Bruce"); 30 Thread.sleep(3000); 31 alert.accept(); 32 driver.switchTo().defaultContent(); 33 34 } 35 }