思路
系统中自定义了404,自定义404页面的mime-type是:text/html;charset=UTF-8,使用HTTP HEAD请求,判断有没有Content-Length头。
如果有Content-Length头说明是正常的二进制附件。
import lombok.extern.slf4j.Slf4j; import org.apache.http.Header; import org.apache.http.HttpResponse; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.HttpHead; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.ssl.TrustStrategy; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.util.Collections; import java.util.List; import java.util.Properties; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.stream.Collectors; @Slf4j public class Main { static String folder =""; public static void main(String[] args) throws IOException{ Properties prop = System.getProperties(); String os = prop.getProperty("os.name"); if (os != null && os.toLowerCase().indexOf("linux") > -1) { folder="/home/erdpc/"; }else { folder="d:/"; } Main bootstrap = new Main(); List<String> files = bootstrap.readAttachments(); ExecutorService executor1 = Executors.newFixedThreadPool(50); for (int i = 0; i <files.size(); i++) { MyTask task = new MyTask(files.get(i).replace("\","/")); executor1.execute(task); } System.in.read(); //阻塞主线程 } static class MyTask implements Runnable { private String filename; public MyTask(String filename) { this.filename = filename; } @Override public void run() { try { String uri = "https://www.ksst-erdpc.cn/".concat(filename.replaceAll(" ","%20")); HttpHead httpHead = new HttpHead(uri); //巡检时更改为信任证书 CloseableHttpClient httpClient = buildDefaultHttpClientTrustSSL(); httpHead.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64)spider"); httpHead.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); httpHead.addHeader("Accept-Encoding", "gzip, deflate"); httpHead.addHeader("Accept-Language", "zh-CN,zh;q=0.9"); HttpResponse response = httpClient.execute(httpHead); Header contentLength = response.getFirstHeader("Content-Length"); Header contentType = response.getFirstHeader("Content-Type"); if(contentLength == null&&"text/html;charset=UTF-8".equals(contentType.getValue())) { write(filename.replaceAll("%20"," ").concat(" ")); } } catch (IOException e) { System.err.printf("文件检测出错:%s,信息:%s ",filename.replaceAll("%20"," "),e.getMessage()); write(filename.replaceAll("%20"," ").concat(" ").concat(e.getMessage())); } } @Override public String toString() { return "MyTask [filename=" + filename + "]"; } } private static void write(String url) { try { Files.write(Paths.get(folder.concat("error.txt")),url.getBytes(),StandardOpenOption.APPEND); } catch (IOException e) { e.printStackTrace(); } } private List<String> readAttachments() { try { List<String> files = Files.readAllLines(Paths.get(folder.concat("attachment.txt"))); System.out.println(files.size()); return files.stream().map(file->{ if(file.startsWith(""")) { file = file.substring(1); } if(file.endsWith(""")) { file= file.substring(0,file.length()-1); } return file; }).collect(Collectors.toList()); } catch (IOException e) { e.printStackTrace(); } return Collections.emptyList(); } /** * 信任SSL证书 * * @return */ public static CloseableHttpClient buildDefaultHttpClientTrustSSL() { SSLContext sslContext = null; try { sslContext = SSLContextBuilder.create().useProtocol(SSLConnectionSocketFactory.SSL).loadTrustMaterial(new TrustStrategy() { @Override public boolean isTrusted(X509Certificate[] x, String y) throws CertificateException { return true; } }).build(); } catch (Exception e) { e.printStackTrace(); } RequestConfig config = RequestConfig.custom() .setSocketTimeout(30000) .setConnectTimeout(30000) .setConnectionRequestTimeout(30000) .setContentCompressionEnabled(true) .build(); return HttpClientBuilder.create().setDefaultRequestConfig(config).setSSLContext(sslContext).setSSLHostnameVerifier(new HostnameVerifier() { @Override public boolean verify(String x, SSLSession y) { return true; } }).build(); }
程序运行结果
下图是文件丢失的(部分是黑客扫描数据)
参考来源:
https://stackoverflow.com/questions/4992317/illegal-character-in-path-at-index-16