zoukankan      html  css  js  c++  java
  • java 对于url地址的实体符号的处理

    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 
            <dependency>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-lang3</artifactId>
                <version>3.4</version>
            </dependency>
    -->
    
    
    public static String getNextPage(String web) throws Exception {
            HttpComponentsClientHttpRequestFactory factory=new HttpComponentsClientHttpRequestFactory();
    //        factory.setConnectTimeout(60000);
    //        factory.set
            String regx="上一页</a>)(<a.*?href=[\"']?(((http|https)?://)?/?[^\"']+)[\"']?.*?>(.+)</a>";
            RestTemplate template=new RestTemplate();
            URI uri=new URI(URLDecoder.decode(web,"utf-8"));
            String stri = template.getForObject(uri, String.class);
            Pattern pattern=Pattern.compile(regx);
            Matcher matcher = pattern.matcher(stri);
            matcher.find();
            String group = matcher.group();
            group = group.substring(group.indexOf("href="/") + 7, group.indexOf("" title=""));
            group="http://www.youbianku.com/"+group;
            group= StringEscapeUtils.unescapeHtml4(group);
            return group;
    
        }
  • 相关阅读:
    前端框架AngularJS入门
    springcloud---Eureka yml
    springcloud 的Eureka配置
    虚拟机中文件和目录的相关操作
    SpringBoot整合Redis
    IDEA整合GIT操作
    Ubuntu 安装步骤
    Centos 7 安装
    Centos 6 安装步骤
    防止root口令被破解
  • 原文地址:https://www.cnblogs.com/wangyang108/p/6010145.html
Copyright © 2011-2022 走看看