zoukankan      html  css  js  c++  java
  • 简单的网络爬虫程序(Web Crawlers)

           程序比较简单,但是能体现基本原理。

    package com.wxisme.webcrawlers;
    
    import java.io.*;
    import java.net.*;
    
    /**
     * Web Crawlers * @author wxisme
     *
     */
    public class WebCrawlers {
    
        public static void main(String[] args) {
            URL url = null;
            try {
                url = new URL("http://www.baidu.com");
            } catch (MalformedURLException e) {
                System.out.println("域名不合法!");
                e.printStackTrace();
            }
            InputStream is = null;
            try {
                is = url.openStream();
            } catch (IOException e) {
                e.printStackTrace();
            }
            
            FileOutputStream fos = null;
            try {
                fos = new FileOutputStream("E:\baidu.txt");
            } catch (FileNotFoundException e) {
                System.out.println("文件创建失败!");
                e.printStackTrace();
            }
            //使用转换流设置字符集
            BufferedReader br = null;
            try {
                br = new  BufferedReader(new InputStreamReader(
                        is,"utf-8"));
            } catch (UnsupportedEncodingException e) {
                System.out.println("字符集设置失败!");
                e.printStackTrace();
            }
            
            BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                    fos));
            
            String msg = null;
            try {
                while((msg = br.readLine()) != null) {
                    bw.write(msg);
                    bw.newLine();
                }
            } catch (IOException e) {
                System.out.println("文件操作失败!");
                e.printStackTrace();
            } finally {
                try {
                    bw.flush();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                closeAll(is, fos, br, bw);
            }
            
            
            
    
        }
        
        public static void closeAll(Closeable ... io) {
            for(Closeable temp : io) {
                if(temp != null) {
                    try {
                        temp.close();
                    } catch (IOException e) {
                        System.out.println("文件关闭失败!");
                        e.printStackTrace();
                    }
                }
            }
        }
    
    }
  • 相关阅读:
    截除数值,分别获取数据与单位
    angularjs判断对象值是否存在
    angularjs文件上传实例
    获取Zxing.net 中所有barcodeformat
    创建QR CODE
    QR Code的容错级别
    验证QR Code版本
    Web API返回自定义数据给客户端
    理解Hive 不同组件的功能
    tcpdf中文解决方案
  • 原文地址:https://www.cnblogs.com/wxisme/p/4385513.html
Copyright © 2011-2022 走看看