zoukankan      html  css  js  c++  java
  • 抓取RSS源中链接图片问题REDIRECT_LOCATIONS 悟寰轩

    抓取链接里的图片时通常和rss原理的url不是对应的,中间可能会有跳转,用到了如下方法:

       private String getWebRealPath(String src, String link) {
    try {
    if (src.startsWith("/")) {
    link = getRealLink(link);
    src = link.substring(0, link.indexOf("/", 7)) + src;
    }
    if (src.startsWith("./")) {
    link = getRealLink(link);
    src = link.substring(0, link.lastIndexOf("/")) + src.substring(1);
    }
    if (src.startsWith("../")) {
    link = getRealLink(link);
    String[] str = link.split("/");
    int len = src.split("\\.\\./").length;
    String s = "";
    if (link.split("/").length != 3) {
    for (int i = 0; i < str.length - len; i++) {
    s += str[i] + "/";
    }
    src = s + src.replaceAll("\\.\\./", "");
    } else {
    src = link + "/" + src.replaceAll("\\.\\./", "");
    }
    }
    } catch (Exception e) {
    return src;
    }
    return src;
    }
    private String getRealLink(String link) {
    try {
    HttpContext httpContext = new BasicHttpContext();
    HttpUtil.get(link, "iso-8859-1", httpContext);
    RedirectLocations redirectLocations = (RedirectLocations) httpContext
    .getAttribute(DefaultRedirectStrategy.REDIRECT_LOCATIONS);
    if (redirectLocations != null) {
    List uriList = redirectLocations.getAll();
    URI uri = uriList.get(uriList.size() - 1);
    link = uri.toString();
    }
    } catch (Exception e) {
    return link;
    }
    return link;
    }

     

  • 相关阅读:
    MyBatis 基础搭建及架构概述
    Effective Java
    Effective Java
    Spring注解?啥玩意?
    Spring 中的Null-Safety
    Spring Resource框架体系介绍
    内部类的用法
    一文了解ConfigurationConditon接口
    详解状态压缩动态规划算法
    【硬核】使用替罪羊树实现KD-Tree的增删改查
  • 原文地址:https://www.cnblogs.com/sunxucool/p/2800014.html
Copyright © 2011-2022 走看看