zoukankan      html  css  js  c++  java
  • Java下载文件 爬虫 超时处理解决方案

    import java.util.List;
    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.SocketTimeoutException;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.logging.Logger;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class Main {
    
     public static final int sleepMsPerConnection = 1000;
     public static final int timeOutMs = 20000;
     public static final int retry = 2;
    
     private static void download(String urlStr, String filePath) {
      int retryCount = 0;
      while(true){
       try {
        DownloadThread thread = new DownloadThread(urlStr, filePath);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return;
        }else{
         thread.interrupt();//实测并不能结束线程,请参考如何中断JAVA线程一文
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    
    
     private static String getHtml(String urlStr) {
      int retryCount = 0;
      while(true){
       try {
        GetHtmlThread thread = new GetHtmlThread(urlStr);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return thread.html;
        }else{
         thread.interrupt();
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    }
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.URL;
    
    public class GetHtmlThread extends Thread {
    
     public String html;
     private String urlStr;
    
     public GetHtmlThread(String urlStr) {
      this.urlStr = urlStr;
     }
    
     public void run() {
      try {
       Thread.sleep(Main.sleepMsPerConnection);
       URL url = new URL(urlStr);
       StringBuilder sb = new StringBuilder();
       BufferedReader br = new BufferedReader(new InputStreamReader(url
         .openStream()));
       String line = null;
       while ((line = br.readLine()) != null) {
        sb.append(line);
        sb.append('\n');
       }
       br.close();
       this.html = sb.toString();
      } catch (InterruptedException e) {
       // do nothing?
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
    }
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.URL;
    
    public class DownloadThread extends Thread {
    
     private String urlStr;
     private String filePath;
    
     public DownloadThread(String urlStr, String filePath) {
      this.urlStr = urlStr;
      this.filePath = filePath;
     }
    
     public void run() {
      try {
       URL url = new URL(urlStr);
       InputStream is = url.openStream();
       File pdfFile = new File(filePath);
       FileOutputStream os = new FileOutputStream(pdfFile);
       copyStream(is, os);
       is.close();
       os.close();
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
     
     /**
      * still need to close inputstream and outputstream after call this method
      * @param inputStream
      * @param outputStream
      * @throws IOException
      */
     private void copyStream(InputStream inputStream, OutputStream outputStream)
       throws IOException {
      byte[] b = new byte[1024];
      int len;
      while ((len = inputStream.read(b)) > 0) {
       outputStream.write(b, 0, len);
      }
      outputStream.flush();
     }
    }
  • 相关阅读:
    漂亮的自适应宽度的多色彩CSS图片按钮
    Qt中设置widget背景颜色/图片的注意事项(使用样式表 setStyleSheet())
    QT的父子Widget之间消息的传递(如果子类没有accept或ignore该事件,则该事件会被传递给其父亲——Qlabel与QPushButton的处理就不一样)
    QT内置的ICON资源
    QT事件过滤器(QT事件处理的5个层次:自己覆盖或过滤,父窗口过滤,Application过滤与通知)
    QMetaObject感觉跟Delphi的类之类有一拼,好好学一下
    POJ 1013 小水题 暴力模拟
    WMDestroy函数调用inherited,难道是为了调用子类覆盖函数?还有这样调用的?
    技术资深、还关注市场的几率较高
    有感,懂市场比懂产品重要,懂产品比懂技术重要——想起凡客诚品和YY语音了
  • 原文地址:https://www.cnblogs.com/simonshi/p/2308497.html
Copyright © 2011-2022 走看看