zoukankan      html  css  js  c++  java
  • java爬虫

    import java.io.BufferedReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.PrintWriter;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    public class WebSpider {
    public static void main(String[] args) {
    URL url = null;
    URLConnection urlconn = null;
    BufferedReader br = null;
    PrintWriter pw = null;
    String regex = "http://[\w+\.?/?]+\.[A-Za-z]+";
    Pattern p = Pattern.compile(regex);
    try {
    url = new URL("http://www.4399.com/");
    urlconn = url.openConnection();
    pw = new PrintWriter(new FileWriter("e:/url.txt"), true);//这里我们把收集到的链接存储在了E盘底下的一个叫做url的txt文件中
    br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
    String buf = null;
    while ((buf = br.readLine()) != null) {
    Matcher buf_m = p.matcher(buf);
    while (buf_m.find()) {
    pw.println(buf_m.group());
    }
    }
    System.out.println("获取成功!");
    } catch (MalformedURLException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    } finally {
    try {
    br.close();
    } catch (IOException e) {
    e.printStackTrace();
    }
    pw.close();
    }
    }
    }

    http://www.cnblogs.com/huangwentian/p/6484534.html

  • 相关阅读:
    webpack--前端自动化工具
    Vue--入门篇
    集千篇理论,终得深拷贝与浅拷贝的初解
    事件循环--eventloop
    对象的属性(变量+对象)
    集千篇理论精华,感悟对同步和异步的理解
    vue--先决篇
    js的基本语法规范
    python 模块加载错误总结
    Python logging模块无法正常输出日志
  • 原文地址:https://www.cnblogs.com/XJJD/p/7070514.html
Copyright © 2011-2022 走看看