zoukankan      html  css  js  c++  java
  • Java下载文件 爬虫 超时处理解决方案

    import java.util.List;
    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.SocketTimeoutException;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.logging.Logger;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class Main {
    
     public static final int sleepMsPerConnection = 1000;
     public static final int timeOutMs = 20000;
     public static final int retry = 2;
    
     private static void download(String urlStr, String filePath) {
      int retryCount = 0;
      while(true){
       try {
        DownloadThread thread = new DownloadThread(urlStr, filePath);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return;
        }else{
         thread.interrupt();//实测并不能结束线程,请参考如何中断JAVA线程一文
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    
    
     private static String getHtml(String urlStr) {
      int retryCount = 0;
      while(true){
       try {
        GetHtmlThread thread = new GetHtmlThread(urlStr);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return thread.html;
        }else{
         thread.interrupt();
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    }
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.URL;
    
    public class GetHtmlThread extends Thread {
    
     public String html;
     private String urlStr;
    
     public GetHtmlThread(String urlStr) {
      this.urlStr = urlStr;
     }
    
     public void run() {
      try {
       Thread.sleep(Main.sleepMsPerConnection);
       URL url = new URL(urlStr);
       StringBuilder sb = new StringBuilder();
       BufferedReader br = new BufferedReader(new InputStreamReader(url
         .openStream()));
       String line = null;
       while ((line = br.readLine()) != null) {
        sb.append(line);
        sb.append('\n');
       }
       br.close();
       this.html = sb.toString();
      } catch (InterruptedException e) {
       // do nothing?
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
    }
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.URL;
    
    public class DownloadThread extends Thread {
    
     private String urlStr;
     private String filePath;
    
     public DownloadThread(String urlStr, String filePath) {
      this.urlStr = urlStr;
      this.filePath = filePath;
     }
    
     public void run() {
      try {
       URL url = new URL(urlStr);
       InputStream is = url.openStream();
       File pdfFile = new File(filePath);
       FileOutputStream os = new FileOutputStream(pdfFile);
       copyStream(is, os);
       is.close();
       os.close();
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
     
     /**
      * still need to close inputstream and outputstream after call this method
      * @param inputStream
      * @param outputStream
      * @throws IOException
      */
     private void copyStream(InputStream inputStream, OutputStream outputStream)
       throws IOException {
      byte[] b = new byte[1024];
      int len;
      while ((len = inputStream.read(b)) > 0) {
       outputStream.write(b, 0, len);
      }
      outputStream.flush();
     }
    }
  • 相关阅读:
    java 反射 报错:Attempt to get java.lang.Integer field "..." with illegal data type conversion to int
    经常报错:Communications link failure
    解析Excel
    spring+atomikos+mybatis 多数据源事务(动态切换)
    mysql 存储过程
    Ace Admin 学习笔记
    spring mvc 表单提交 乱码
    spring 事务
    基于注解的Spring多数据源配置和使用(非事务)
    javaEE版本的eclipse中导入工程,发现server里面找不到工程,根本发布不了也不能运行
  • 原文地址:https://www.cnblogs.com/simonshi/p/2308497.html
Copyright © 2011-2022 走看看