zoukankan      html  css  js  c++  java
  • 简单的网络爬虫程序(Web Crawlers)

           程序比较简单,但是能体现基本原理。

    package com.wxisme.webcrawlers;
    
    import java.io.*;
    import java.net.*;
    
    /**
     * Web Crawlers * @author wxisme
     *
     */
    public class WebCrawlers {
    
        public static void main(String[] args) {
            URL url = null;
            try {
                url = new URL("http://www.baidu.com");
            } catch (MalformedURLException e) {
                System.out.println("域名不合法!");
                e.printStackTrace();
            }
            InputStream is = null;
            try {
                is = url.openStream();
            } catch (IOException e) {
                e.printStackTrace();
            }
            
            FileOutputStream fos = null;
            try {
                fos = new FileOutputStream("E:\baidu.txt");
            } catch (FileNotFoundException e) {
                System.out.println("文件创建失败!");
                e.printStackTrace();
            }
            //使用转换流设置字符集
            BufferedReader br = null;
            try {
                br = new  BufferedReader(new InputStreamReader(
                        is,"utf-8"));
            } catch (UnsupportedEncodingException e) {
                System.out.println("字符集设置失败!");
                e.printStackTrace();
            }
            
            BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                    fos));
            
            String msg = null;
            try {
                while((msg = br.readLine()) != null) {
                    bw.write(msg);
                    bw.newLine();
                }
            } catch (IOException e) {
                System.out.println("文件操作失败!");
                e.printStackTrace();
            } finally {
                try {
                    bw.flush();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                closeAll(is, fos, br, bw);
            }
            
            
            
    
        }
        
        public static void closeAll(Closeable ... io) {
            for(Closeable temp : io) {
                if(temp != null) {
                    try {
                        temp.close();
                    } catch (IOException e) {
                        System.out.println("文件关闭失败!");
                        e.printStackTrace();
                    }
                }
            }
        }
    
    }
  • 相关阅读:
    洛谷 P1068 分数线划定
    LeetCode 7. Reverse Integer
    LeetCode 504. Base 7
    洛谷 P1598 垂直柱状图
    用户场景
    个人博客03
    个人博客02
    个人博客01
    《构建之法》阅读笔记03
    学习进度条(第四周)
  • 原文地址:https://www.cnblogs.com/wxisme/p/4385513.html
Copyright © 2011-2022 走看看