zoukankan      html  css  js  c++  java
  • 爬取中国信用黑名单网站图片和数据到本地

     2 
     3 import java.io.File;
     4 import java.io.IOException;
     5 import java.io.InputStream;
     6 import java.net.URL;
     7 import java.net.URLConnection;
     8 
     9 import org.apache.commons.io.FileUtils;
    10 
    11 
    12 
    13 public class SpiderDemo {
    14     public static void main(String[] args) throws IOException {
    15 //        URL url = new URL("http://www.zhongguoxinyongheimingdan.com");
    16 //        URLConnection connection = url.openConnection();
    17 //        InputStream in = connection.getInputStream();
    18 //        File file = new File("F://a.txt");
    19 //        FileUtils.copyInputStreamToFile(in, file);
    20         File srcDir = new File("F://a.txt");
    21         String str = FileUtils.readFileToString(srcDir, "UTF-8");
    22         String[] str1 = str.split("href=");
    23         for (int i = 3; i < str1.length-1; i++) {
    24             URL url = new URL("http://www.zhongguoxinyongheimingdan.com"+str1[i].substring(1, 27));
    25             File f = new File("F://abc//"+str1[i].substring(2, 22));
    26             if(!f.exists()){
    27             f.mkdir();    
    28             File desc1 = new File(f,str1[i].substring(1, 22)+".txt");
    29             URLConnection connection = url.openConnection();
    30             InputStream in = connection.getInputStream();
    31             FileUtils.copyInputStreamToFile(in, desc1);
    32             String str2 = FileUtils.readFileToString(desc1, "UTF-8");
    33             String[] str3 = str2.split("" src="");
    34             for(int j = 1;j<str3.length-2;j++){
    35                 URL url1 = new URL(str3[j].substring(0, 81));
    36                 URLConnection connection1 = url1.openConnection();
    37                 connection1.setDoInput(true);
    38                 InputStream in1 = connection1.getInputStream();
    39                 File desc2 = new File(f,str3[j].substring(44,76)+".jpg");
    40                 FileUtils.copyInputStreamToFile(in1, desc2);
    41             }
    42             }
    43             }
    44         }
    45     
    46 }
  • 相关阅读:
    nginx.conf中配置laravel框架站点
    centos6.4下安装php7+nginx+mariadb环境
    Windows Terminal 安装和运行
    微软 WSL 重装操作系统
    Pulumi 如何在 Windows 环境中设置
    Ubuntu 20.04 安装 JDK
    代码的 Lint 是什么意思
    CentOS 8 手动安装 Go 1.16 版本
    Raspberry Pi 安装 go 后提示错误 Exec format error
    系统管理--查看网卡、内存等
  • 原文地址:https://www.cnblogs.com/bianqi/p/6404066.html
Copyright © 2011-2022 走看看