zoukankan      html  css  js  c++  java
  • 2016

    package Demo;

    import java.net.URL;
    import java.util.ArrayList;
    import java.util.Scanner;

    public class WebCrawler {

    public static void main(String[] args) {
    Scanner in = new Scanner(System.in);
    System.out.println("Please enter a url");
    String url = in.nextLine();
    crawler(url);
    }

    public static void crawler(String url) {
    ArrayList<String> listOne = new ArrayList<>();
    ArrayList<String> listTwo = new ArrayList<>();
    listOne.add(url);
    while (!listOne.isEmpty() && listTwo.size() <= 10) {
    String s1 = listOne.remove(0);
    if (!listTwo.contains(s1)) {
    listTwo.add(s1);
    System.out.println(s1);
    }
    for (String s : getSubstring(s1)) {
    if (!listTwo.contains(s))
    listOne.add(s);
    }
    }
    }

    public static ArrayList<String> getSubstring(String url) {
    ArrayList<String> list = new ArrayList<>();
    try {
    URL urlone = new URL(url);
    Scanner input = new Scanner(urlone.openStream());
    int current = 0;
    while (input.hasNext()) {
    String line = input.nextLine();
    current = line.indexOf(""http:", current);
    while (current > 0) {
    int endIndex = line.indexOf(""", current+1);
    if (endIndex > 0) {
    list.add(line.substring(current+1, endIndex));
    current = line.indexOf(""http:", endIndex);
    } else
    current = -1;
    }
    }
    } catch (Exception ex) {
    ex.printStackTrace();
    }
    return list;
    }

    }

  • 相关阅读:
    XML解析技术研究(一)
    Qt解析XML文件(QXmlStreamReader)
    Qt XML读取写入操作
    QT QXmlStreamWriter用法小结
    QtXML 举例
    libpcap使用
    PCAP研究
    粗谈pcap_next_ex()
    C#扇形的绘制与Hittest交互、图种制作
    ORA-01747: user.table.column, table.column 或列说明无效
  • 原文地址:https://www.cnblogs.com/laigaoxiaode/p/5562410.html
Copyright © 2011-2022 走看看