zoukankan      html  css  js  c++  java
  • 爬取中国信用黑名单网站图片和数据到本地

     2 
     3 import java.io.File;
     4 import java.io.IOException;
     5 import java.io.InputStream;
     6 import java.net.URL;
     7 import java.net.URLConnection;
     8 
     9 import org.apache.commons.io.FileUtils;
    10 
    11 
    12 
    13 public class SpiderDemo {
    14     public static void main(String[] args) throws IOException {
    15 //        URL url = new URL("http://www.zhongguoxinyongheimingdan.com");
    16 //        URLConnection connection = url.openConnection();
    17 //        InputStream in = connection.getInputStream();
    18 //        File file = new File("F://a.txt");
    19 //        FileUtils.copyInputStreamToFile(in, file);
    20         File srcDir = new File("F://a.txt");
    21         String str = FileUtils.readFileToString(srcDir, "UTF-8");
    22         String[] str1 = str.split("href=");
    23         for (int i = 3; i < str1.length-1; i++) {
    24             URL url = new URL("http://www.zhongguoxinyongheimingdan.com"+str1[i].substring(1, 27));
    25             File f = new File("F://abc//"+str1[i].substring(2, 22));
    26             if(!f.exists()){
    27             f.mkdir();    
    28             File desc1 = new File(f,str1[i].substring(1, 22)+".txt");
    29             URLConnection connection = url.openConnection();
    30             InputStream in = connection.getInputStream();
    31             FileUtils.copyInputStreamToFile(in, desc1);
    32             String str2 = FileUtils.readFileToString(desc1, "UTF-8");
    33             String[] str3 = str2.split("" src="");
    34             for(int j = 1;j<str3.length-2;j++){
    35                 URL url1 = new URL(str3[j].substring(0, 81));
    36                 URLConnection connection1 = url1.openConnection();
    37                 connection1.setDoInput(true);
    38                 InputStream in1 = connection1.getInputStream();
    39                 File desc2 = new File(f,str3[j].substring(44,76)+".jpg");
    40                 FileUtils.copyInputStreamToFile(in1, desc2);
    41             }
    42             }
    43             }
    44         }
    45     
    46 }
  • 相关阅读:
    inspector 只读属性
    使用MongoDB
    【Roslyn C#】Runtime环境Unity读取字符串代码
    Unity使用LoadImage 读取byte[]图片时,会出现白边问题
    团队中避免不可维护代码的措施
    Unity点到线段的最短距离
    判断点是否在多边形内部
    Unity 根据前后帧位置自动旋转
    停止Unity在运行时脚本修改重新编译的情况
    KI子线段树 / AKEE SegmentTree
  • 原文地址:https://www.cnblogs.com/bianqi/p/6404066.html
Copyright © 2011-2022 走看看