以前一直觉得AC自动机很神奇,总觉得是不是就是自动AC题目的算法
后来学到了才知道是字符串匹配的一个算法
但是还是偶尔去忽悠忽悠小学弟
这次写的一个其实很简单,写的也比较糙
实际上就是模拟提交
不会HTML,所以一边做一边查个个是干嘛的
实际这个小项目就是
1.从POJ爬取题目链接
2.从博客园爬取代码
3.模拟提交
4.判断题目是否AC,如果AC下一题,没有就寻找下一个代码
主要也就是用到了Jsoup,还是挺有意思的一个东西
但是也还是很有问题,就是关于代码提取的问题
因为从博客园提取代码的格式不一样,所以提交上去很有可能是会CE的
以后再改进这个地方,尽量使AC率高些
1 package locy; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.io.InputStreamReader; 6 import java.io.OutputStreamWriter; 7 import java.io.UnsupportedEncodingException; 8 import java.net.HttpURLConnection; 9 import java.net.MalformedURLException; 10 import java.net.URL; 11 import java.util.ArrayList; 12 import java.util.Arrays; 13 import java.util.HashMap; 14 import java.util.LinkedList; 15 import java.util.Map; 16 import java.util.Map.Entry; 17 import java.util.Queue; 18 import java.util.Scanner; 19 import java.util.regex.Matcher; 20 import java.util.regex.Pattern; 21 22 import org.apache.commons.codec.EncoderException; 23 import org.apache.commons.codec.binary.Base64; 24 import org.jsoup.Connection; 25 import org.jsoup.Connection.Method; 26 import org.jsoup.Connection.Response; 27 import org.jsoup.Jsoup; 28 import org.jsoup.nodes.Document; 29 import org.jsoup.nodes.Element; 30 import org.jsoup.select.Elements; 31 import org.omg.CORBA.portable.InputStream; 32 33 public class Main { 34 public static void main(String[] args) throws IOException, EncoderException, InterruptedException { 35 ArrayList<String>s = new ArrayList<>(); 36 Document doc = Jsoup.connect("http://poj.org/problemlist").get(); 37 Elements links = doc.select("a[href]"); 38 Pattern pattern = Pattern.compile("[a-z/:.\?0-9=]+"); 39 Pattern patternnum = Pattern.compile("[0-9]+"); 40 Matcher matcher; 41 for(Element link:links){ 42 matcher = pattern.matcher(link.attr("abs:href")); 43 if(matcher.matches()){ 44 matcher = patternnum.matcher(link.text()); 45 String url = link.attr("abs:href"); 46 if(matcher.matches()) 47 s.add(link.attr("abs:href")); 48 49 } 50 } 51 Map<Integer, String>num = new HashMap<>(); 52 Map<Integer, String>name = new HashMap<>(); 53 String cookie = getCookie(); 54 for(String usl:s){ 55 FindProblem(usl, name, num,cookie); 56 } 57 } 58 59 /**获取搜索的页面 60 * 获取搜索代码的页面 61 * @param id 62 * @return 63 */ 64 65 public static String getSearchDate(Integer id){ 66 String date = "http://zzk.cnblogs.com/s/blogpost?Keywords=poj+"+id; 67 return date; 68 } 69 70 71 /** 72 * 寻找问题 73 * @param url 传入的页面 74 * @param nameurl 75 * @param num 76 * @throws InterruptedException 77 */ 78 79 public static void FindProblem(String url,Map<Integer, String>nameurl,Map<Integer,String>num,String coocie) throws InterruptedException{ 80 try { 81 Document doc = Jsoup.connect(url).get(); 82 Elements links = doc.select("a[href]"); 83 Pattern pattern = Pattern.compile("[a-z/:.\?0-9=]+"); 84 Matcher matcher; 85 Integer id=null; 86 for(Element link:links){ 87 matcher = pattern.matcher(link.attr("abs:href")); 88 if(matcher.matches()&&link.attr("abs:href").length()==30){ 89 String str = link.attr("abs:href"); 90 id = Integer.parseInt(str.substring(str.length()-4)); 91 if(getDate(getSearchDate(id), "poj "+id,coocie,id)) 92 System.out.println(id+":true"); 93 //getDate(url, name); 94 //getText(url, cookie,a); 95 //nameurl.put(id, link.text()); 96 num.put(id, str); 97 } 98 } 99 System.out.println(id+":false"); 100 } catch (IOException e) { 101 // TODO Auto-generated catch block 102 e.printStackTrace(); 103 } 104 } 105 106 /** 107 * 获取代码 108 * @param url 109 * @param name 110 * @throws InterruptedException 111 */ 112 113 public static boolean getDate(String url,String name,String cookie,Integer id) throws InterruptedException{ 114 ArrayList<String>ansurl = new ArrayList<>(); 115 try { 116 Document doc = Jsoup.connect(url).get(); 117 //System.out.println(url); 118 Elements links = doc.select("a[href]"); 119 for(Element link:links){ 120 String tempurl = link.attr("abs:href"); 121 if(tempurl.endsWith(".html")){ 122 System.out.println(tempurl); 123 doc = Jsoup.connect(tempurl).get(); 124 Elements lin = doc.select("div.cnblogs_code"); 125 //System.out.println("link"+lin.first().text()); //代码 126 try { 127 //System.out.println(lin.text()+"sadf"); 128 if(lin.text()==null) 129 continue; 130 if(lin.text().indexOf("#")==-1) 131 continue; 132 push(cookie, lin.text().substring(lin.text().indexOf("#")),id); 133 Thread.sleep(2000); 134 if(getStatu(id.toString())) 135 return true; 136 } catch (EncoderException e) { 137 System.out.println("123"); 138 // TODO Auto-generated catch block 139 e.printStackTrace(); 140 } 141 } 142 } 143 } catch (IOException e) { 144 e.printStackTrace(); 145 } 146 return false; 147 } 148 149 /** 150 * 151 * @param url 152 * @param cookie 153 * @return 154 * @throws IOException 155 * @throws EncoderException 156 */ 157 158 /* 159 public static void getText(String url,String cookie,String a) throws IOException, EncoderException{ 160 Document doc = Jsoup.connect(url).cookie("JSESSIONID",cookie).get(); 161 //a = getDate(url, name); 162 push(cookie,a,id); 163 Elements links = doc.select("div"); 164 for(Element link:links){ 165 System.out.println(link.className()+" :"+link.text()); 166 } 167 }*/ 168 169 170 /** 171 * 判断是否提交成功 172 * @param id 173 * @return 174 */ 175 176 public static boolean getStatu(String id){ 177 Document doc; 178 String flag[] = new String[10]; 179 try { 180 doc = Jsoup.connect("http://poj.org/status").get(); 181 Elements status = doc.select("tr"); 182 for(Element sta:status){ 183 String str = sta.text(); 184 if(str.indexOf(' ')==8){ 185 flag = str.split(" "); 186 System.out.println(flag[3]); 187 System.out.println(sta.text()); 188 if(flag[1].equals("flaseacmachine")&&flag[3].equals("Accepted")&&flag[2].equals(id)) 189 return true; 190 } 191 } 192 } catch (IOException e) { 193 e.printStackTrace(); 194 } 195 return false; 196 } 197 198 /** 199 * 提交代码 200 * @param doc 201 * @param url 202 * @param cookie 203 * @param a 204 * @throws IOException 205 * @throws EncoderException 206 */ 207 public static void push(String cookie,String a,Integer id) throws IOException, EncoderException{ 208 System.out.println("sdafasdf"); 209 Pattern pa = Pattern.compile("[0-9]+[ ]"); 210 Matcher ma = pa.matcher(a); 211 a = ma.replaceAll("").trim(); 212 int main = a.indexOf("int main()"); 213 int len = a.indexOf("return 0;",main); 214 a=a.substring(0, len+9); 215 a+="}"; 216 Connection.Response re = Jsoup.connect("http://poj.org/submit"). 217 data("problem_id", id.toString()) 218 .data("language", "4") 219 .data("source",getString(a)) 220 .data("submit", "Submit") 221 .data("encoded", "1").cookie("JSESSIONID",cookie).method(Method.POST).execute(); 222 } 223 224 /** 225 * 获取Cookie 226 * @return 227 * @throws IOException 228 */ 229 230 public static String getCookie() throws IOException{ 231 232 String surl = "http://poj.org/login"; 233 234 URL url = new URL(surl); 235 236 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 237 238 connection.setDoOutput(true); 239 240 OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream(), "UTF-8"); 241 242 out.write("user_id1=flaseacmachine&password1=13341352708&B1=login"); 243 out.flush(); 244 out.close(); 245 String cookie = connection.getHeaderField("Set-Cookie"); 246 //System.out.println(cookie); 247 return cookie.substring(11, 33+11); 248 } 249 250 /** 251 * 加密代码 252 * @param x 253 * @return 254 */ 255 256 public static String getString(String x){ 257 return new String(Base64.encodeBase64(x.getBytes())); 258 } 259 }