zoukankan      html  css  js  c++  java
  • HtmlEntities

    #region GetOnlyTextFromHtmlCode + RemoveHtmlChars + RemoveTagFromHtmlCode
            /// <summary>
            /// http://www.codeproject.com/script/Content/ViewAssociatedFile.aspx?rzp=%2FKB%2Fedit%2FZetaHtmlEditControl%2F%2FZetaHtmlEditControl-Source.zip&zep=Control%2FHtmlEditControl.cs&obid=43954&obtid=2&ovid=13
            /// </summary>
            /// <param name="htmlCode"></param>
            /// <returns></returns>
            private static string getOnlyTextFromHtmlCode(string htmlCode)
            {
                //<br>
                htmlCode = htmlCode.Replace("
    ", @" ");
                htmlCode = htmlCode.Replace("
    ", @" ");
                htmlCode = htmlCode.Replace("
    ", @" ");
    
                htmlCode = htmlCode.Replace(@"</p>", Environment.NewLine + Environment.NewLine);
                htmlCode = htmlCode.Replace(@"</P>", Environment.NewLine + Environment.NewLine);
    
                //html comment 
                htmlCode = Regex.Replace(
                    htmlCode,
                    @"<!--.*?-->",
                    string.Empty,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                //<p>
                htmlCode = Regex.Replace(htmlCode,
                    @"<br[^>]*>",
                    Environment.NewLine,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                //tags
                htmlCode = removeTagFromHtmlCode(@"style", htmlCode);
                htmlCode = removeTagFromHtmlCode(@"script", htmlCode);
    
                //html
                htmlCode = Regex.Replace(
                    htmlCode,
                    "<(.|
    )+?>",
                    string.Empty,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                //umlaute
                htmlCode = unescapeHtmlEntities(htmlCode);
    
                //whitespaces
                htmlCode = Regex.Replace(
                    htmlCode,
                    @" +",
                    @" ",
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                return htmlCode;
            }
            /// <summary>
            /// http://dev.w3.org/html5/html-author/charref
            /// </summary>
            /// <param name="htmlCode"></param>
            /// <returns></returns>
            private static string unescapeHtmlEntities(string htmlCode)
            {

          htmlCode = htmlCode.Replace(@"&nbsp;", @" ");

          htmlCode = htmlCode.Replace(@"&Auml;", @"ä");
          htmlCode = htmlCode.Replace(@"&absp;", @"");
          htmlCode = htmlCode.Replace(@"&obsp;", @"");
          htmlCode = htmlCode.Replace(@"&Obsp;", @"");
          htmlCode = htmlCode.Replace(@"&ubsp;", @"");
          htmlCode = htmlCode.Replace(@"&Ubsp;", @"");
          htmlCode = htmlCode.Replace(@"&szlig;", @"ß");

          htmlCode = htmlCode.Replace(@"&pound;", @"£");
          htmlCode = htmlCode.Replace(@"&sect;", @"§");
          htmlCode = htmlCode.Replace(@"&copy;", @"©");
          htmlCode = htmlCode.Replace(@"&reg;", @"®");
          htmlCode = htmlCode.Replace(@"&micro;", @"µ");
          htmlCode = htmlCode.Replace(@"&para;", @"¶");
          htmlCode = htmlCode.Replace(@"&Oslash;", @"Ø");
          htmlCode = htmlCode.Replace(@"&oslash;", @"Ø");
          htmlCode = htmlCode.Replace(@"&divide;", @"÷");
          htmlCode = htmlCode.Replace(@"&times;", @"×");

                return htmlCode;
            }
    
            private static string removeTagFromHtmlCode(
                string tag,
                string htmlCode)
            {
                return Regex.Replace(
                    htmlCode,
                    string.Format(@"<{0}.*?</{1}>", tag, tag),
                    string.Empty,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
            }
            #endregion
    

      

  • 相关阅读:
    Token_使用JWT生成token
    JSON Web Tokens(JWT)
    Hadoop 2.0 中的资源管理框架
    Hadoop 1.0 和 2.0 中的数据处理框架
    Hadoop 概述
    探索 OpenStack 之(17):计量模块 Ceilometer 中的数据收集机制
    探索 OpenStack 之(16):计量模块 Ceilometer 介绍及优化
    VMware + OpenStack: 从 Plugin 到 VIO (VMware Integrated OpenStack)的演进
    探索 OpenStack 之(15):oslo.messaging 和 Cinder 中 MessageQueue 消息的发送和接收
    探索 OpenStack 之(14):OpenStack 中 RabbitMQ 的使用
  • 原文地址:https://www.cnblogs.com/geovindu/p/4310328.html
Copyright © 2011-2022 走看看