zoukankan      html  css  js  c++  java
  • java爬虫

    import java.io.BufferedReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.PrintWriter;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    public class WebSpider {
    public static void main(String[] args) {
    URL url = null;
    URLConnection urlconn = null;
    BufferedReader br = null;
    PrintWriter pw = null;
    String regex = "http://[\w+\.?/?]+\.[A-Za-z]+";
    Pattern p = Pattern.compile(regex);
    try {
    url = new URL("http://www.4399.com/");
    urlconn = url.openConnection();
    pw = new PrintWriter(new FileWriter("e:/url.txt"), true);//这里我们把收集到的链接存储在了E盘底下的一个叫做url的txt文件中
    br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
    String buf = null;
    while ((buf = br.readLine()) != null) {
    Matcher buf_m = p.matcher(buf);
    while (buf_m.find()) {
    pw.println(buf_m.group());
    }
    }
    System.out.println("获取成功!");
    } catch (MalformedURLException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    } finally {
    try {
    br.close();
    } catch (IOException e) {
    e.printStackTrace();
    }
    pw.close();
    }
    }
    }

    http://www.cnblogs.com/huangwentian/p/6484534.html

  • 相关阅读:
    系统设计5:Google三剑客
    lintcode亚麻九题
    设计模式17:单例模式
    设计模式16:迭代器模式
    设计模式15:组合模式
    476. Number Complement
    561. Array Partition I
    627. Swap Salary
    617. Merge Two Binary Trees
    728. Self Dividing Numbers
  • 原文地址:https://www.cnblogs.com/XJJD/p/7070514.html
Copyright © 2011-2022 走看看