zoukankan      html  css  js  c++  java
  • 简单的网络爬虫程序(Web Crawlers)

           程序比较简单,但是能体现基本原理。

    package com.wxisme.webcrawlers;
    
    import java.io.*;
    import java.net.*;
    
    /**
     * Web Crawlers * @author wxisme
     *
     */
    public class WebCrawlers {
    
        public static void main(String[] args) {
            URL url = null;
            try {
                url = new URL("http://www.baidu.com");
            } catch (MalformedURLException e) {
                System.out.println("域名不合法!");
                e.printStackTrace();
            }
            InputStream is = null;
            try {
                is = url.openStream();
            } catch (IOException e) {
                e.printStackTrace();
            }
            
            FileOutputStream fos = null;
            try {
                fos = new FileOutputStream("E:\baidu.txt");
            } catch (FileNotFoundException e) {
                System.out.println("文件创建失败!");
                e.printStackTrace();
            }
            //使用转换流设置字符集
            BufferedReader br = null;
            try {
                br = new  BufferedReader(new InputStreamReader(
                        is,"utf-8"));
            } catch (UnsupportedEncodingException e) {
                System.out.println("字符集设置失败!");
                e.printStackTrace();
            }
            
            BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                    fos));
            
            String msg = null;
            try {
                while((msg = br.readLine()) != null) {
                    bw.write(msg);
                    bw.newLine();
                }
            } catch (IOException e) {
                System.out.println("文件操作失败!");
                e.printStackTrace();
            } finally {
                try {
                    bw.flush();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                closeAll(is, fos, br, bw);
            }
            
            
            
    
        }
        
        public static void closeAll(Closeable ... io) {
            for(Closeable temp : io) {
                if(temp != null) {
                    try {
                        temp.close();
                    } catch (IOException e) {
                        System.out.println("文件关闭失败!");
                        e.printStackTrace();
                    }
                }
            }
        }
    
    }
  • 相关阅读:
    (hdu 7.1.8)Quoit Design(最低点——在n一个点,发现两点之间的最小距离)
    [Windows]_[0基础]_[使用命令行工具dumpbin分析文件]
    《走开》反馈
    二分基础
    日历的问题C语言,C++(boost),python,Javascript,Java和Matlab实现
    Unity3D 游戏开发架构篇 ——性格一流的设计和持久性
    2015第54周四
    2015第54周三
    2015第54周二
    2015第54周一
  • 原文地址:https://www.cnblogs.com/wxisme/p/4385513.html
Copyright © 2011-2022 走看看