zoukankan      html  css  js  c++  java
  • 从FCKeditor生成HTML字段里获取文章内容和图片

    最近在做我们学校微软创新之旅的专题网站,文章内容是从原来的新闻系统里提取的。原来的新闻系统的文本编辑是用的FCKeditor,用户只要直接输入和上传图片就可以生成相关的HTML代码并存入数据库,而我想要将发布的文章的图片提取出来放在桌面,那么就必须将图片地址从HTML的代码里提取出来,同时我还需要去掉HTML标签的文本放入RSS中供用户订阅,所以只能想个办法解决这些问题。

    using System;
    using System.Data;
    using System.Configuration;
    using System.Web;
    using System.Web.Security;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Web.UI.WebControls.WebParts;
    using System.Web.UI.HtmlControls;
    using System.Text.RegularExpressions;
    
    
    /// <summary> 
    /// FiterHtml 的摘要说明 
    /// </summary> 
    public class FiterHtml
    {
        public FiterHtml() { }
        ///   <summary> 
        ///   去除HTML标记 
        ///   </summary> 
        public static string NoHTML(string Htmlstring)
        {
            //删除脚本 
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",
              RegexOptions.IgnoreCase);
            //删除HTML 
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+", "",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "/"",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "/xa1",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "/xa2",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "/xa3",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "/xa9",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(/d+);", "",
              RegexOptions.IgnoreCase);
            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("/r/n", "");
            Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
    
            return Htmlstring;
        }
        public static string StripHTML(string strHtml)
        {
            string[] aryReg =
                {
                  @"<script[^>]*?>.*?</script>",
                  @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(//[",
                   @"'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>", @"([/r/n])[/s]+", 
                    @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", 
                    @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);",
                    @"&(copy|#169);", @"&#(/d+);", @"-->", @"<!--.*/n"
                };
    
            string[] aryRep =
                {
                  "", "", "", "/"", "&", "<", ">", "   ", "/xa1",  //chr(161), 
                  "/xa2",  //chr(162), 
                  "/xa3",  //chr(163), 
                  "/xa9",  //chr(169), 
                  "", "/r/n", ""
                };
    
            string newReg = aryReg[0];
            string strOutput = strHtml;
            for (int i = 0; i < aryReg.Length; i++)
            {
                Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
                strOutput = regex.Replace(strOutput, aryRep[i]);
            }
            strOutput.Replace("<", "");
            strOutput.Replace(">", "");
            strOutput.Replace("/r/n", "");
            return strOutput;
        }
        ///   <summary> 
        ///   移除HTML标签 
        ///   </summary> 
        ///   <param   name="HTMLStr">HTMLStr</param> 
        public static string ParseTags(string HTMLStr)
        {
            return System.Text.RegularExpressions.Regex.Replace(HTMLStr, "<[^>]*>", "");
        }
        ///   <summary> 
        ///   取出文本中的图片地址 
        ///   </summary> 
        ///   <param   name="HTMLStr">HTMLStr</param> 
        public static string GetImgUrl(string HTMLStr)
        {
            string str = string.Empty;
            string sPattern = @"^<img/s+[^>]*>";
            Regex r = new Regex(@"<img/s+[^>]*/s*src/s*=/s*([']?)(?<url>/S+)'?[^>]*>",
              RegexOptions.Compiled);
            Match m = r.Match(HTMLStr.ToLower());
            if (m.Success)
                str = m.Result("${url}");
            return str;
        }
    
    }
    测试用例:

    protected void Page_Load(object sender, EventArgs e)
       {
           string test = "例:<strong   id=/"abc/">你好</strong><p><a href=/"http://green.njut.edu.cn/images/gslogo.png/" target=/"_blank/">绿荫网</a></p><IMG title=/"绿荫网/" src=/"http://121.9.206.74/Gift/face/2.gif/" border=0>H<EM>i,星烛网</EM>";
           FiterHtml filter = new FiterHtml();
           Response.Write(FiterHtml.NoHTML(test));
       }



  • 相关阅读:
    剑指office--------合并两个排序的链表
    剑指office--------栈的压入、弹出序列
    剑指office--------二叉树中和为某一值的路径
    剑指office--------翻转单词顺序列
    剑指office--------丑数
    剑指office--------机器人的运动范围
    剑指office--------二叉树的下一个结点
    数论------欧拉函数
    hdu 5831 Rikka with Parenthesis II
    hdu 5821 Ball (贪心)
  • 原文地址:https://www.cnblogs.com/ituff/p/2858549.html
Copyright © 2011-2022 走看看