package com.xzm.util.task; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Timer; import java.util.TimerTask; import java.util.logging.Level; import org.junit.Test; import org.junit.runner.JUnitCore; import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.stereotype.Service; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import org.springframework.util.StringUtils; import cn.os.util.QuietCssErrorHandler; import cn.os.util.SilentIncorrectnessListener; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.WebRequest; import com.gargoylesoftware.htmlunit.WebResponse; import com.gargoylesoftware.htmlunit.html.DomNode; import com.gargoylesoftware.htmlunit.html.HtmlElement; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.util.FalsifyingWebConnection; import com.sun.jmx.snmp.tasks.Task; import com.xzm.entity.Qiushibaike; import com.xzm.entity.QiushibaikeAnswer; import com.xzm.service.QiuShiService; @Service @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration(locations = { "/com/xzm/config/spring/applicationContext.xml" }) public class InviteEvaluate { private static Logger logger = LoggerFactory .getLogger(InviteEvaluate.class); @Autowired private QiuShiService qiuShiService; public HtmlPage htmlPage; public void getContent() { try { new Thread() { public void run() { JUnitCore.runClasses(new Class[] { InviteEvaluate.class }); } }.start(); } catch (Exception e) { } } public void getContentTest(){ try { htmlPage = getPage(); getQiuContent(htmlPage); } catch (Exception e) { // TODO: handle exception } } //http://bdimg.share.baidu.com/static/js/logger.js?cdnversion=387105#1 /** * 获取正文 * * @throws Exception */ @SuppressWarnings("unchecked") public void getQiuContent(HtmlPage htmlPage) { Qiushibaike qiushibaike = null; List<DomNode> page = (List<DomNode>) htmlPage .getByXPath("//*[@class='block untagged mb15 bs2']"); List<Qiushibaike> qiushibaikes = new ArrayList<Qiushibaike>(); for (int i = 0; i < page.size(); i++) { try { qiushibaike = new Qiushibaike(); String str = page.get(i).asXml().split("\n")[0].split(""")[3];//qiushi_tag_62569450 String id = str.substring(str.lastIndexOf("_")+1, str.length()); if(!StringUtils.isEmpty(id)){ qiushibaike.setQiushiId(Integer.valueOf(id)); } List<DomNode> content = (List<DomNode>) page.get(i).getByXPath( "*[@class='content']"); if (content.size() > 0) { qiushibaike.setContent(content.get(0).asText()); } List<DomNode> image = (List<DomNode>) page.get(i).getByXPath( "*[@class='thumb']/a/img"); if (image.size() > 0) { qiushibaike.setPicUrl(image.get(0).asXml().split(""")[1]); qiushibaike.setTitle(image.get(0).asXml().split(""")[3]); } List<DomNode> down = (List<DomNode>) page.get(i).getByXPath( "*[@class='bar']/ul/li[2]/a"); if (down.size() > 0) { qiushibaike.setDown(Integer.valueOf(down.get(0).asText())); } List<DomNode> up = (List<DomNode>) page.get(i).getByXPath( "*[@class='bar']/ul/li/a"); // if (up.size() > 0) { qiushibaike.setUp(Integer.valueOf(up.get(0).asText())); } List<DomNode> author = (List<DomNode>) page.get(i).getByXPath( "*[@class='author']"); // if (author.size() > 0) { qiushibaike.setAuthor(author.get(0).asText()); } qiushibaike.setCreateTime(new Date()); qiushibaikes.add(qiushibaike); } catch (Exception e) { logger.info("qiushi 正文 error : " ,e); continue ; } } int count = qiuShiService.addQiushi(qiushibaikes); logger.info("当期时间是: " + new Date() + " 正文内容数量是 : " + count); //获取完正文,获取评论 getQiuAnswers(htmlPage); } /** * 糗事回复信息入库 * @throws Exception */ @SuppressWarnings("unchecked") public void getQiuAnswers(HtmlPage htmlPage){ List<HtmlElement> elements = (List<HtmlElement>) htmlPage.getByXPath("//*[@class='qiushi_comments']"); for (HtmlElement htmlElement : elements) { try { HtmlPage resultPage = htmlElement.click(); List<QiushibaikeAnswer> answers = getQiuAnswer(resultPage); int count = qiuShiService.addQiushiAnswer(answers); logger.info("当期时间是: " + new Date() + " 回复内容数量是 : " + count); } catch (Exception e) { continue ; } } } /** * 获取糗事的id * @param resultPage * @return */ public String getQiuId(HtmlPage resultPage){ DomNode head = (DomNode) resultPage.getByXPath("//*[@class='block untagged noline mb15 bs2']").get(0); String str = head.asXml().split("\n")[0].split(""")[3];//qiushi_tag_62569450 return str.substring(str.lastIndexOf("_")+1, str.length()); } /** * 获取糗事的回复内容 * @param resultPage * @return */ @SuppressWarnings("unchecked") public List<QiushibaikeAnswer> getQiuAnswer(HtmlPage resultPage){ List<DomNode> replays = (List<DomNode>) resultPage.getByXPath("//*[@class='replay']"); List<QiushibaikeAnswer> answers = new ArrayList<QiushibaikeAnswer>(); QiushibaikeAnswer answer = null; for (DomNode replay : replays) { try { answer = new QiushibaikeAnswer(); if(!StringUtils.isEmpty(getQiuId(resultPage))){ answer.setContentId(Integer.valueOf(getQiuId(resultPage))); } DomNode ansNameDom = (DomNode) replay.getByXPath("a").get(0); answer.setAnsName(ansNameDom.asText()); DomNode ansContentDom = (DomNode) replay.getByXPath("span").get(0); answer.setAnsContent(ansContentDom.asText()); answers.add(answer); } catch (Exception e) { logger.info("qiushi 回复 error : " ,e); continue ; } } return answers; } /** * 获取主页信息 * * @return * @throws Exception */ public HtmlPage getPage() { try { WebClient client = new WebClient(); //处理异常等信息 client.setIncorrectnessListener(new SilentIncorrectnessListener()); client.getOptions().setJavaScriptEnabled(false); client.setCssErrorHandler(new QuietCssErrorHandler()); client.getOptions().setThrowExceptionOnScriptError(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF); HtmlPage htmlPage = client.getPage("http://www.qiushibaike.com"); return htmlPage;// } catch (Exception e) { return null; } } @Test public void testTransfer() { try { getContentTest(); } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args) { Timer timer = new Timer(); timer.schedule(new TimerTask() { @Override public void run() { InviteEvaluate evaluate = new InviteEvaluate(); evaluate.getContent(); } },0, 1000*60*5); } }