zoukankan      html  css  js  c++  java
  • java 对于url地址的实体符号的处理

    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 
            <dependency>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-lang3</artifactId>
                <version>3.4</version>
            </dependency>
    -->
    
    
    public static String getNextPage(String web) throws Exception {
            HttpComponentsClientHttpRequestFactory factory=new HttpComponentsClientHttpRequestFactory();
    //        factory.setConnectTimeout(60000);
    //        factory.set
            String regx="上一页</a>)(<a.*?href=[\"']?(((http|https)?://)?/?[^\"']+)[\"']?.*?>(.+)</a>";
            RestTemplate template=new RestTemplate();
            URI uri=new URI(URLDecoder.decode(web,"utf-8"));
            String stri = template.getForObject(uri, String.class);
            Pattern pattern=Pattern.compile(regx);
            Matcher matcher = pattern.matcher(stri);
            matcher.find();
            String group = matcher.group();
            group = group.substring(group.indexOf("href="/") + 7, group.indexOf("" title=""));
            group="http://www.youbianku.com/"+group;
            group= StringEscapeUtils.unescapeHtml4(group);
            return group;
    
        }
  • 相关阅读:
    Zookeeper数据类型
    Zookeeper基本命令
    Redis集群
    Mysql 模拟自增主键
    git回滚版本操作
    Redis缓存穿透和雪崩
    日期格式jackson格式化
    Zookeeper安装
    redis主从复制
    Redis哨兵模式
  • 原文地址:https://www.cnblogs.com/wangyang108/p/6010145.html
Copyright © 2011-2022 走看看