zoukankan      html  css  js  c++  java
  • java 获取网页指定内容-2(实践+修改)

    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import java.util.Arrays;
    
    
    public class Weather {
     String urlString;
     String array;
     StringBuffer sb=new StringBuffer("");
      
     public static void main(String[] args) throws Exception {
      Weather client = new Weather("http://www.weather.com.cn/weather/101181201.shtml");
      client.run();
     }
     public Weather(String urlString) {
      this.urlString = urlString;
     }
     public void run() throws Exception {
     
      URL url = new URL(urlString);
      
      HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
      
      BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection
        .getInputStream(),"utf8"));
      String line;
    
      while ((line = reader.readLine()) != null){
      Pattern p = Pattern.compile("<p class="wea">(.+?)</p>");
        Matcher m = p.matcher(line);
        while(m.find()) { 
            array = m.group(1);
            sb.append(array+","); 
        }
      }
      
        String arr = sb.toString();
        String[] s = arr.split("\,");
        System.out.println(s[s.length - 7]);
        
     }
     
    
    }

    utf8编码格式

    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import java.util.Arrays;
    
    
    public class Weather {
     String urlString;
     String array;
     StringBuffer sb=new StringBuffer("");
      
     public static void main(String[] args) throws Exception {
      
      Weather client = new Weather("http://www.weather.com.cn/weather/101181201.shtml");
      client.run();
     }
     public Weather(String urlString) {
      this.urlString = urlString;
     }
     public void run() throws Exception {
     
      URL url = new URL(urlString);
      
      HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
      
      BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection
        .getInputStream(),"utf8"));
      String line;
    
      while ((line = reader.readLine()) != null){
      Pattern p = Pattern.compile("<p class="wea">(.+?)</p>");
        Matcher m = p.matcher(line);
        while(m.find()) { 
            array = m.group(1);
            sb.append(array+",");//符合正则的数据追加到sb,并以逗号分割 
        }
      }
      
        String arr = sb.toString();//sb转为字符串
        String[] s = arr.split("\,");//字符串转为数组,以逗号为标记 
        System.out.println(s[s.length - 7]);//取数组中倒数第7个数
        
     }
     
    
    }
  • 相关阅读:
    Java检测文件是否UTF8编码
    Linux: uid/euid/suid的关系
    位移运算
    Springmvc 重定向参数传递方式
    @RequestBody和@RequestParam区别
    jsp页面老提示Multiple annotations found at this line:
    滚动表格代码
    滚动条样式修改
    WebService的四种客户端调用方式
    table元素的td和ul元素li隔行变色
  • 原文地址:https://www.cnblogs.com/hellowzd/p/4991796.html
Copyright © 2011-2022 走看看