zoukankan      html  css  js  c++  java
  • 通过文本或url扫描下载文件

      1 package com.xxxx;
      2 
      3 import java.io.BufferedInputStream; 
      4 import java.io.BufferedReader; 
      5 import java.io.File; 
      6 import java.io.FileNotFoundException; 
      7 import java.io.FileOutputStream; 
      8 import java.io.IOException; 
      9 import java.io.InputStreamReader; 
     10 import java.net.MalformedURLException; 
     11 import java.net.URL; 
     12 import java.util.ArrayList;
     13 import java.util.List;
     14 import java.util.regex.Matcher; 
     15 import java.util.regex.Pattern; 
     16  
     17 public class GetImage { 
     18     
     19     public int getCharacterPosition(String string,int numb){
     20         //这里是获取"#"符号的位置
     21         Matcher slashMatcher = Pattern.compile("/").matcher(string);
     22         int mIdx = 0;
     23         while(slashMatcher.find()) {
     24            mIdx++;
     25            //当"#"符号第二次出现的位置
     26            if(mIdx == numb){
     27               break;
     28            }
     29         }
     30         return slashMatcher.start();
     31     }
     32     
     33     
     34     
     35     
     36     
     37     /**
     38      * 下载文件(图片、压缩包等文件都可以下载)
     39      * @param httpUrl
     40      * eg:http://www.xxxx.com/uploadfiles/123.rar
     41      */
     42     public void getHtmlFile(String httpUrl) { 
     43     URL url; 
     44     BufferedInputStream in; 
     45     FileOutputStream file; 
     46     try { 
     47        System.out.println("取网络文件"); 
     48        //获取子目录
     49        String unitPath = httpUrl.substring(getCharacterPosition(httpUrl,3) ,httpUrl.lastIndexOf("/"));
     50        String fileName = httpUrl.substring(httpUrl.lastIndexOf("/")); 
     51        String filePath = "F:\FocuSimple"+unitPath+"\";
     52        File up = new File(filePath);
     53         if(!up.exists()){    //判断文件夹是否不存在
     54             up.mkdirs();
     55         }
     56        
     57        url = new URL(httpUrl); 
     58       
     59        in = new BufferedInputStream(url.openStream()); 
     60       
     61        file = new FileOutputStream(new File(filePath+fileName)); 
     62        int t; 
     63        while ((t = in.read()) != -1) { 
     64        file.write(t); 
     65        } 
     66        file.close(); 
     67        in.close(); 
     68       System.out.println("文件获取成功"); 
     69     } catch (MalformedURLException e) { 
     70        e.printStackTrace(); 
     71     } catch (FileNotFoundException e) { 
     72       e.printStackTrace(); 
     73     } catch (IOException e) { 
     74        e.printStackTrace(); 
     75     } 
     76     } 
     77       
     78     public String getHtmlCode(String httpUrl) throws IOException { 
     79     String content =""; 
     80     URL uu = new URL(httpUrl); // 创建URL类对象 
     81     BufferedReader ii = new BufferedReader(new InputStreamReader(uu 
     82         .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象 
     83     String input; 
     84     while ((input = ii.readLine()) != null) { // 建立读取循环,并判断是否有读取值 
     85        content += input; 
     86     } 
     87     ii.close(); 
     88     return content; 
     89     } 
     90     public static List<String> getImageSrc(String htmlCode) {
     91         List<String> imageSrcList = new ArrayList<String>();
     92 //        Pattern p = Pattern.compile("<img\b[^>]*\bsrc\b\s*=\s*('|")?([^'"
    
    f>]+(\.jpg|\.bmp|\.eps|\.gif|\.mif|\.miff|\.png|\.tif|\.tiff|\.svg|\.wmf|\.jpe|\.jpeg|\.dib|\.ico|\.tga|\.cut|\.pic)\b)[^>]*>", Pattern.CASE_INSENSITIVE);
     93         Pattern p = Pattern.compile("src\b\s*=\s*('|")?([^'"
    
    f>]+(\.jpg|\.bmp|\.eps|\.gif|\.mif|\.miff|\.png|\.tif|\.tiff|\.svg|\.wmf|\.jpe|\.jpeg|\.dib|\.ico|\.tga|\.cut|\.pic)\b)[^>]*", Pattern.CASE_INSENSITIVE);
     94         Matcher m = p.matcher(htmlCode);
     95         String quote = null;
     96         String src = null;
     97         while (m.find()) {
     98             quote = m.group(1);
     99             src = (quote == null || quote.trim().length() == 0) ? m.group(2).split("\s+")[0] : m.group(2);
    100             imageSrcList.add(src);
    101             System.out.println("src"+src);
    102         }
    103         return imageSrcList;
    104     }
    105     
    106     public void get(String url,String text) throws IOException { 
    107       
    108     String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|")/?(([\w-]+/)*([\w-]+\.(jpg|JPG|png|PNG|gif|GIF)))('|")"; 
    109     String searchImgReg2 = "(?x)(src|SRC|background|BACKGROUND)=('|")(http://([\w-]+\.)+[\w-]+(:[0-9]+)*(/[\w-]+)*(/[\w-]+\.(jpg|JPG|png|PNG|gif|GIF)))('|")"; 
    110     String content  = "";
    111     if(text == null){
    112         content = this.getHtmlCode(url);
    113     }else{
    114         content = text;
    115     }
    116     System.out.println("内容:"+content); 
    117     
    118     Pattern p = Pattern.compile("src\b\s*=\s*('|")?([^'"
    
    f>]+(\.jpg|\.bmp|\.eps|\.gif|\.mif|\.miff|\.png|\.tif|\.tiff|\.svg|\.wmf|\.jpe|\.jpeg|\.dib|\.ico|\.tga|\.cut|\.pic)\b)[^>]*", Pattern.CASE_INSENSITIVE);
    119     Matcher m = p.matcher(content);
    120     String quote = null;
    121     String src = null;
    122     while (m.find()) {
    123         quote = m.group(1);
    124         src = (quote == null || quote.trim().length() == 0) ? m.group(2).split("\s+")[0] : m.group(2);
    125         this.getHtmlFile(url+src); 
    126     }
    127     
    128     Pattern pattern = Pattern.compile(searchImgReg); 
    129     Matcher matcher = pattern.matcher(content); 
    130     while (matcher.find()) { 
    131        System.out.println("图片路径1:"+matcher.group(3)); 
    132       this.getHtmlFile(url+matcher.group(3)); 
    133          
    134     } 
    135       
    136     pattern = Pattern.compile(searchImgReg2); 
    137     matcher = pattern.matcher(content); 
    138     while (matcher.find()) { 
    139        System.out.println("图片路径1:"+matcher.group(3)); 
    140       this.getHtmlFile(matcher.group(3)); 
    141           
    142     } 
    143     // searchImgReg = 
    144     // "(?x)(src|SRC|background|BACKGROUND)=('|")/?(([\w-]+/)*([\w-]+\.(jpg|JPG|png|PNG|gif|GIF)))('|")"; 
    145     } 
    146     public static void main(String[] args) throws IOException { 
    147         GetImage gcp = new GetImage(); 
    148         gcp.get("http://www.123rf.com.cn/#baidu01",null); 
    149         gcp.get(null,"<img src="/images/ico/logo.png">"); 
    150         gcp.getHtmlFile("http://www.xxxx.com/uploadfiles/123.rar");
    151     } 
    152 }
  • 相关阅读:
    超级好用的excel导出方法,比phpexcel快n倍,并且无乱码
    PHP生成随机数;订单号唯一
    php判断检测一个数组里有没有重复的值
    修改git 提交的用户名和用户Email命令
    利用 PHP CURL zip压缩文件上传
    Linux 重启 PHP-FPM 命令
    Postgresql 时间串转换格式
    rollup node.js 打包工具
    PHP正则表达式提取html超链接中的href地址
    解决Ubuntu系统下 mysql 远程连接失败的问题 ERROR 2003 (HY000): Can't connect to MySQL server on 'xxx.xxx.xx.xx' (110)
  • 原文地址:https://www.cnblogs.com/huanglibin/p/6846722.html
Copyright © 2011-2022 走看看