zoukankan      html  css  js  c++  java
  • java爬虫

    import java.io.BufferedReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.PrintWriter;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    public class WebSpider {
    public static void main(String[] args) {
    URL url = null;
    URLConnection urlconn = null;
    BufferedReader br = null;
    PrintWriter pw = null;
    String regex = "http://[\w+\.?/?]+\.[A-Za-z]+";
    Pattern p = Pattern.compile(regex);
    try {
    url = new URL("http://www.4399.com/");
    urlconn = url.openConnection();
    pw = new PrintWriter(new FileWriter("e:/url.txt"), true);//这里我们把收集到的链接存储在了E盘底下的一个叫做url的txt文件中
    br = new BufferedReader(new InputStreamReader(urlconn.getInputStream()));
    String buf = null;
    while ((buf = br.readLine()) != null) {
    Matcher buf_m = p.matcher(buf);
    while (buf_m.find()) {
    pw.println(buf_m.group());
    }
    }
    System.out.println("获取成功!");
    } catch (MalformedURLException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    } finally {
    try {
    br.close();
    } catch (IOException e) {
    e.printStackTrace();
    }
    pw.close();
    }
    }
    }

    http://www.cnblogs.com/huangwentian/p/6484534.html

  • 相关阅读:
    Visual Studio Code 工具使用教程
    JS获取节点的兄弟,父级,子级元素
    动态加载js
    实用的文本和图片无缝滚动效果
    兼容各种浏览器的文字循环无缝滚动效果
    中文乱码
    js 循环json
    js中获取basePath
    json转换为map
    java des 加密/解密
  • 原文地址:https://www.cnblogs.com/XJJD/p/7070514.html
Copyright © 2011-2022 走看看