zoukankan      html  css  js  c++  java
  • Java下载文件 爬虫 超时处理解决方案

    import java.util.List;
    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.SocketTimeoutException;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.logging.Logger;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class Main {
    
     public static final int sleepMsPerConnection = 1000;
     public static final int timeOutMs = 20000;
     public static final int retry = 2;
    
     private static void download(String urlStr, String filePath) {
      int retryCount = 0;
      while(true){
       try {
        DownloadThread thread = new DownloadThread(urlStr, filePath);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return;
        }else{
         thread.interrupt();//实测并不能结束线程,请参考如何中断JAVA线程一文
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    
    
     private static String getHtml(String urlStr) {
      int retryCount = 0;
      while(true){
       try {
        GetHtmlThread thread = new GetHtmlThread(urlStr);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return thread.html;
        }else{
         thread.interrupt();
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    }
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.URL;
    
    public class GetHtmlThread extends Thread {
    
     public String html;
     private String urlStr;
    
     public GetHtmlThread(String urlStr) {
      this.urlStr = urlStr;
     }
    
     public void run() {
      try {
       Thread.sleep(Main.sleepMsPerConnection);
       URL url = new URL(urlStr);
       StringBuilder sb = new StringBuilder();
       BufferedReader br = new BufferedReader(new InputStreamReader(url
         .openStream()));
       String line = null;
       while ((line = br.readLine()) != null) {
        sb.append(line);
        sb.append('\n');
       }
       br.close();
       this.html = sb.toString();
      } catch (InterruptedException e) {
       // do nothing?
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
    }
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.URL;
    
    public class DownloadThread extends Thread {
    
     private String urlStr;
     private String filePath;
    
     public DownloadThread(String urlStr, String filePath) {
      this.urlStr = urlStr;
      this.filePath = filePath;
     }
    
     public void run() {
      try {
       URL url = new URL(urlStr);
       InputStream is = url.openStream();
       File pdfFile = new File(filePath);
       FileOutputStream os = new FileOutputStream(pdfFile);
       copyStream(is, os);
       is.close();
       os.close();
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
     
     /**
      * still need to close inputstream and outputstream after call this method
      * @param inputStream
      * @param outputStream
      * @throws IOException
      */
     private void copyStream(InputStream inputStream, OutputStream outputStream)
       throws IOException {
      byte[] b = new byte[1024];
      int len;
      while ((len = inputStream.read(b)) > 0) {
       outputStream.write(b, 0, len);
      }
      outputStream.flush();
     }
    }
  • 相关阅读:
    异常处理
    反射4中内置函数
    property装饰器与继承
    封装
    面向对象编程
    面向对象编程
    项目开发规范
    logging模块
    22. 一个题来探查对 字符串,指针,数组三方面的关联使用方面的概念是否清晰,分析下面三个printf打印什么?
    21. 让指针数组结尾带NULL,使遍历时不依靠计算整个数组大小就可以在结尾遍历结束
  • 原文地址:https://www.cnblogs.com/simonshi/p/2308497.html
Copyright © 2011-2022 走看看