zoukankan      html  css  js  c++  java
  • java爬虫

    import java.io.BufferedReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.PrintWriter;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    public class WebSpider {
    public static void main(String[] args) {
    URL url = null;
    URLConnection urlconn = null;
    BufferedReader br = null;
    PrintWriter pw = null;
    String regex = "http://[\w+\.?/?]+\.[A-Za-z]+";
    Pattern p = Pattern.compile(regex);
    try {
    url = new URL("http://www.4399.com/");
    urlconn = url.openConnection();
    pw = new PrintWriter(new FileWriter("e:/url.txt"), true);//这里我们把收集到的链接存储在了E盘底下的一个叫做url的txt文件中
    br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
    String buf = null;
    while ((buf = br.readLine()) != null) {
    Matcher buf_m = p.matcher(buf);
    while (buf_m.find()) {
    pw.println(buf_m.group());
    }
    }
    System.out.println("获取成功!");
    } catch (MalformedURLException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    } finally {
    try {
    br.close();
    } catch (IOException e) {
    e.printStackTrace();
    }
    pw.close();
    }
    }
    }

    http://www.cnblogs.com/huangwentian/p/6484534.html

  • 相关阅读:
    C语言I博客作业06
    C语言I博客作业05
    C语言I博客作业04
    C语言II博客作业04
    C语言II博客作业03
    C语言II博客作业02
    C语言II博客作业01
    期末总结
    第一次作业
    C语言I博客作业08
  • 原文地址:https://www.cnblogs.com/XJJD/p/7070514.html
Copyright © 2011-2022 走看看