zoukankan      html  css  js  c++  java
  • 去掉HTML代码 保留文字和图片

    取得HTML中的img

            /// <summary>   
            /// 取得HTML中所有图片的 URL。   
            /// </summary>   
            /// <param name="sHtmlText">HTML代码</param>   
            /// <returns>图片的URL列表</returns>   
            public static string[] GetHtmlImageUrlList(string sHtmlText)
            {
                // 定义正则表达式用来匹配 img 标签   
                Regex regImg = new Regex(@"<img[^<>]*?src[s	
    ]*=[s	
    ]*[""']?[s	
    ]*(?<imgUrl>[^s	
    ""'<>]*)[^<>]*?/?[s	
    ]*>", RegexOptions.IgnoreCase);
    
                // 搜索匹配的字符串   
                MatchCollection matches = regImg.Matches(sHtmlText);
                int i = 0;
                string[] sUrlList = new string[matches.Count];
    
                // 取得匹配项列表   
                foreach (Match match in matches)
                    sUrlList[i++] = match.Groups["imgUrl"].Value;
                return sUrlList;
            }

    取得HTML中的文字

     /// <summary>
            /// 取得html中的文字
            /// </summary>
            /// <param name="htmlString"></param>
            /// <returns></returns>
            public static string NoHTML(string htmlString)
            {
                if (string.IsNullOrEmpty(htmlString)) return string.Empty;
                //删除脚本   
                htmlString = Regex.Replace(htmlString, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
                //删除HTML   
                htmlString = Regex.Replace(htmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"([
    ])[s]+", "", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"-->", "", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
    
                htmlString = Regex.Replace(htmlString, @"&(quot|#34);", """, RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(nbsp|#160);", "   ", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&#(d+);", "", RegexOptions.IgnoreCase);
    
                //htmlString = System.Web.HttpUtility.HtmlEncode(htmlString);
    
                return htmlString;
            }
  • 相关阅读:
    dede list调用 内容模型 附件
    dedecms list标签调用附加表字段--绝对成功
    织梦系统站点首页、列表、文章页等页面点击数调用方法
    apache include 文件包含引用的方法 报错 [an error occurred while processing this directive]
    windows 2008 中IIS7.0以上如何设置404错误页面
    织梦Fatal error: Call to a member function GetInnerText()
    dedecms清空栏目后,新建ID不从1开始的解决方法
    js禁止
    AngularJs表单验证
    submile 安装,汉化,插件
  • 原文地址:https://www.cnblogs.com/YorkQi/p/13924529.html
Copyright © 2011-2022 走看看