zoukankan      html  css  js  c++  java
  • Java实现web页面内容抓取

     1 package demo;
     2 
     3 import java.io.BufferedReader;
     4 import java.io.IOException;
     5 import java.io.InputStream;
     6 import java.io.InputStreamReader;
     7 
     8 /**
     9  * web页面内容抓取
    10  * @author sy
    11  *
    12  */
    13 public class GrabWebHtml{
    14     
    15     public static void main(String[] args) {
    16         String url="http://www.baidu.com";
    17         System.out.println(getWebHtml(url));
    18     }
    19     
    20     public static String getWebHtml(String domain) {
    21         StringBuffer sb = new StringBuffer();
    22         InputStream is = null;
    23         InputStreamReader isr = null;
    24         BufferedReader in = null;
    25         try {
    26             java.net.URL url = new java.net.URL(domain);
    27             is = url.openStream();
    28             isr = new InputStreamReader(is,"utf-8");
    29             in = new BufferedReader(isr);
    30             String line;
    31             while ((line = in.readLine()) != null) {
    32                 sb.append(line).append("
    ");
    33             }
    34             in.close();
    35             
    36         } catch (IOException e) {
    37             e.printStackTrace();
    38         }finally {
    39             try {
    40                 if(in!=null){
    41                     in.close();
    42                     in=null;
    43                 }
    44                 if(isr!=null){
    45                     isr.close();
    46                     isr=null;
    47                 }
    48                 if(is!=null){
    49                     is.close();
    50                     is=null;
    51                 }
    52             } catch (IOException e) {
    53                 e.printStackTrace();
    54             }
    55         }
    56         return sb.toString();
    57     }
    58 
    59 }
  • 相关阅读:
    clickhouse群集模式搭建
    基于Att&ck模型的整体威胁框架方法论
    应急响应Windows各种操作记录备份
    代理总结
    Linux应急响应日志分析
    Web漏洞利用框架
    Suricata策略记录
    应急响应汇总
    IDS&IPSSuricata介绍
    ATT&CKMitreInitial Access(初始化访问)
  • 原文地址:https://www.cnblogs.com/wanying521/p/5179304.html
Copyright © 2011-2022 走看看