zoukankan      html  css  js  c++  java
  • Java中Web页面信息获取

    package com.imooc.regex;
    
    import java.io.BufferedReader;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.InputStreamReader;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    public class RegexSample {
    
        public static void main(String[] args) {
            StringBuilder content = new StringBuilder();
            try {
                FileInputStream fis = new FileInputStream("D:\eclipse-workspace\regex\WebContent\sample.html");
                InputStreamReader isr = new InputStreamReader(fis,"UTF-8");
                BufferedReader bufferedReader = new BufferedReader(isr);
                String lineText = "";
                while((lineText=bufferedReader.readLine()) !=null ) {
                    content.append(lineText + "
    ");
                }
                bufferedReader.close();
    //            System.out.println(content);
                
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                
            }
            
            //创建正则表达式对象
            Pattern p = Pattern.compile("<li>([\u4e00-\u9fa5]{2,8})([a-zA-Z]+)</li>");
            //匹配正则表达式
            Matcher m = p.matcher(content);
            //查找匹配结构
            while(m.find()) {
                System.out.println(m.group(0));//打印全部
                System.out.println(m.group(1));//打印第一个分组
                System.out.println(m.group(2));//打印第二个分组
            }
    
        }
    
    }
  • 相关阅读:
    root----TH1
    linux debug
    python基础 文件操作
    linux下挂载硬盘
    安装双系统
    路由器相关
    007-python 获取网卡实时流量(转)
    ssh密钥登录失败(转)
    当while read line 遇到 ssh(转)
    python实用功能之001-邮件发送
  • 原文地址:https://www.cnblogs.com/wuheng-123/p/13715123.html
Copyright © 2011-2022 走看看