zoukankan      html  css  js  c++  java
  • 沈阳市公交数据爬虫

    by http://micely.net/sourceCodeDetail/39

    沈阳市公交数据爬虫

    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    /**
     * 沈阳市公交数据爬虫
     * 
     * @author Administrator
     * 
     */
    public class Test {
        List<String> l = new ArrayList<String>();
    
        public List<String> grab(String url, String feature) {
            try {
                Document doc = Jsoup.connect(url).get();
                Elements elements = doc.select(feature);
                for (Element element : elements) {
                    String name = null;
                    if ("".equals(element.attr("title"))) {
                        name = element.text();
                    } else {
                        name = element.attr("title");
                    }
                    l.add(name);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            return l;
        }
    
        public List<String> grabAll() {
            String feature = ".ChinaTxt dd a";
            String str[] = { "A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L",
                    "M", "N", "O", "P", "Q", "R", "S", "T", "W", "X", "Y", "Z",
                    "1", "2", "5" };
            List<String> lUrl = new ArrayList<String>();
            for (String string : str) {
                lUrl.add("http://bus.mapbar.com/shenyang/station_list/" + string
                        + ".shtml");
            }
            for (String url : lUrl) {
                this.grab(url, feature);
            }
            return l;
        }
    
        // 转化为mysql的insert语句
        public List<String> convertToMysql() {
            List<String> lTemp = this.grabAll();
            List<String> lSQL = new ArrayList<String>();
            for (String string : lTemp) {
                lSQL.add("insert into tab_bussite(name)values(\"" + string + "\");");
            }
            return lSQL;
        }
    
        public static void main(String[] args) {
            Test grab = new Test();
            List<String> insertSqls = grab.convertToMysql();
            // for (String string : insertSqls) {
            // System.out.println(string);
            // }
            // System.out.println(insertSqls.size());
            File file = new File("d:\\a.sql");// 写输出路径
            FileOutputStream fos = null;
            try {
                fos = new FileOutputStream(file);
                for (String string : insertSqls) {
                    fos.write((string + "\n").getBytes());
                    fos.flush();
                }
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                if (fos != null) {
                    try {
                        fos.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    --

  • 相关阅读:
    /etc/sysctl.conf 控制内核相关配置文件
    python 并发编程 非阻塞IO模型
    python 并发编程 多路复用IO模型
    python 并发编程 异步IO模型
    python 并发编程 阻塞IO模型
    python 并发编程 基于gevent模块 协程池 实现并发的套接字通信
    python 并发编程 基于gevent模块实现并发的套接字通信
    python 并发编程 io模型 目录
    python 并发编程 socket 服务端 客户端 阻塞io行为
    python 并发编程 IO模型介绍
  • 原文地址:https://www.cnblogs.com/syc001/p/2610458.html
Copyright © 2011-2022 走看看