zoukankan      html  css  js  c++  java
  • Java下载文件 爬虫 超时处理解决方案

    import java.util.List;
    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;
    import java.net.HttpURLConnection;
    import java.net.MalformedURLException;
    import java.net.SocketTimeoutException;
    import java.net.URL;
    import java.util.ArrayList;
    import java.util.logging.Logger;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class Main {
    
     public static final int sleepMsPerConnection = 1000;
     public static final int timeOutMs = 20000;
     public static final int retry = 2;
    
     private static void download(String urlStr, String filePath) {
      int retryCount = 0;
      while(true){
       try {
        DownloadThread thread = new DownloadThread(urlStr, filePath);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return;
        }else{
         thread.interrupt();//实测并不能结束线程,请参考如何中断JAVA线程一文
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    
    
     private static String getHtml(String urlStr) {
      int retryCount = 0;
      while(true){
       try {
        GetHtmlThread thread = new GetHtmlThread(urlStr);
        thread.start();
        thread.join(timeOutMs);
        if(!thread.isAlive()){
         return thread.html;
        }else{
         thread.interrupt();
        }
       } catch (InterruptedException e) {
        e.printStackTrace();
       }
       retryCount++;
       if(retryCount > retry){
        throw new RuntimeException("still timeout after retry " + (retry - 1) + " times");
       }
       System.out.println("retry");
      }
     }
    }
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.URL;
    
    public class GetHtmlThread extends Thread {
    
     public String html;
     private String urlStr;
    
     public GetHtmlThread(String urlStr) {
      this.urlStr = urlStr;
     }
    
     public void run() {
      try {
       Thread.sleep(Main.sleepMsPerConnection);
       URL url = new URL(urlStr);
       StringBuilder sb = new StringBuilder();
       BufferedReader br = new BufferedReader(new InputStreamReader(url
         .openStream()));
       String line = null;
       while ((line = br.readLine()) != null) {
        sb.append(line);
        sb.append('\n');
       }
       br.close();
       this.html = sb.toString();
      } catch (InterruptedException e) {
       // do nothing?
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
    }
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.URL;
    
    public class DownloadThread extends Thread {
    
     private String urlStr;
     private String filePath;
    
     public DownloadThread(String urlStr, String filePath) {
      this.urlStr = urlStr;
      this.filePath = filePath;
     }
    
     public void run() {
      try {
       URL url = new URL(urlStr);
       InputStream is = url.openStream();
       File pdfFile = new File(filePath);
       FileOutputStream os = new FileOutputStream(pdfFile);
       copyStream(is, os);
       is.close();
       os.close();
      } catch (Exception e) {
       e.printStackTrace();
       System.exit(1);
      }
     }
     
     /**
      * still need to close inputstream and outputstream after call this method
      * @param inputStream
      * @param outputStream
      * @throws IOException
      */
     private void copyStream(InputStream inputStream, OutputStream outputStream)
       throws IOException {
      byte[] b = new byte[1024];
      int len;
      while ((len = inputStream.read(b)) > 0) {
       outputStream.write(b, 0, len);
      }
      outputStream.flush();
     }
    }
  • 相关阅读:
    sql server 删除重复数据新思路
    sqlserver 迁移 mysql
    ASP.NET Web deployment task failed. 请与服务器管理员联系,检查授权和委派设置 部署任务失败的解决方案
    数据库交互之减少IO次数
    sqlserver 安全设置
    windows设置相对路径的快捷方式
    利用SignalR实现实时推送信息
    image magick 备忘
    dotnetCore开发中遇到的一些问题
    “NETSDK1061: 项目是使用 Microsoft.NETCore.App 版本 2.1.14 还原的, 但使用当前设置, 将改用版本 2.1.0。”的处理方法
  • 原文地址:https://www.cnblogs.com/simonshi/p/2308497.html
Copyright © 2011-2022 走看看