zoukankan
html css js c++ java
asp.net如何去掉HTML标记
/**/
///
<summary>
///
去除HTML标记
///
</summary>
///
<param name="NoHTML">
包括HTML的源码
</param>
///
<returns>
已经去除后的文字
</returns>
public
static
string
NoHTML(
string
Htmlstring)
{
//
删除脚本
Htmlstring
=
Regex.Replace(Htmlstring,
@"
<script[^>]*?>.*?</script>
"
,
""
,RegexOptions.IgnoreCase);
//
删除HTML
Htmlstring
=
Regex.Replace(Htmlstring,
@"
<(.[^>]*)>
"
,
""
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
([\r\n])[\s]+
"
,
""
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
-->
"
,
""
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
<!--.*
"
,
""
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(quot|#34);
"
,
"
\
""
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(amp|#38);
"
,
"
&
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(lt|#60);
"
,
"
<
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(gt|#62);
"
,
"
>
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(nbsp|#160);
"
,
"
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(iexcl|#161);
"
,
"
\xa1
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(cent|#162);
"
,
"
\xa2
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(pound|#163);
"
,
"
\xa3
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&(copy|#169);
"
,
"
\xa9
"
,RegexOptions.IgnoreCase);
Htmlstring
=
Regex.Replace(Htmlstring,
@"
&#(\d+);
"
,
""
,RegexOptions.IgnoreCase);
Htmlstring.Replace(
"
<
"
,
""
);
Htmlstring.Replace(
"
>
"
,
""
);
Htmlstring.Replace(
"
\r\n
"
,
""
);
Htmlstring
=
HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return
Htmlstring;
}
/**/
///
提取HTML代码中文字的C#函数
///
<summary>
///
去除HTML标记
///
</summary>
///
<param name="strHtml">
包括HTML的源码
</param>
///
<returns>
已经去除后的文字
</returns>
using
System;
using
System.Text.RegularExpressions;
public
class
StripHTMLTest
{
public
static
void
Main()
{
string
s
=
StripHTML(
"
<HTML><HEAD><TITLE>中国石龙信息平台</TITLE></HEAD><BODY>faddfs龙信息平台</BODY></HTML>
"
);
Console.WriteLine(s);
}
public
static
string
StripHTML(
string
strHtml)
{
string
[] aryReg
=
{
@"
<script[^>]*?>.*?</script>
"
,
@"
<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>
"
,
@"
([\r\n])[\s]+
"
,
@"
&(quot|#34);
"
,
@"
&(amp|#38);
"
,
@"
&(lt|#60);
"
,
@"
&(gt|#62);
"
,
@"
&(nbsp|#160);
"
,
@"
&(iexcl|#161);
"
,
@"
&(cent|#162);
"
,
@"
&(pound|#163);
"
,
@"
&(copy|#169);
"
,
@"
&#(\d+);
"
,
@"
-->
"
,
@"
<!--.*\n
"
}
;
string
[] aryRep
=
{
""
,
""
,
""
,
"
\
""
,
"
&
"
,
"
<
"
,
"
>
"
,
"
"
,
"
\xa1
"
,
//
chr(161),
"
\xa2
"
,
//
chr(162),
"
\xa3
"
,
//
chr(163),
"
\xa9
"
,
//
chr(169),
""
,
"
\r\n
"
,
""
}
;
string
newReg
=
aryReg[
0
];
string
strOutput
=
strHtml;
for
(
int
i
=
0
;i
<
aryReg.Length;i
++
)
{
Regex regex
=
new
Regex(aryReg[i],RegexOptions.IgnoreCase);
strOutput
=
regex.Replace(strOutput,aryRep[i]);
}
strOutput.Replace(
"
<
"
,
""
);
strOutput.Replace(
"
>
"
,
""
);
strOutput.Replace(
"
\r\n
"
,
""
);
return
strOutput;
}
}
写一个静态方法
移除HTML标签
#region
移除HTML标签
/**/
///
<summary>
///
移除HTML标签
///
</summary>
///
<param name="HTMLStr">
HTMLStr
</param>
public
static
string
ParseTags(
string
HTMLStr)
{
return
System.Text.RegularExpressions.Regex.Replace(HTMLStr,
"
<[^>]*>
"
,
""
);
}
#endregion
取出文本中的图片地址
#region
取出文本中的图片地址
/**/
///
<summary>
///
取出文本中的图片地址
///
</summary>
///
<param name="HTMLStr">
HTMLStr
</param>
public
static
string
GetImgUrl(
string
HTMLStr)
{
string
str
=
string
.Empty;
string
sPattern
=
@"
^<img\s+[^>]*>
"
;
Regex r
=
new
Regex(
@"
<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>
"
,
RegexOptions.Compiled);
Match m
=
r.Match(HTMLStr.ToLower());
if
(m.Success)
str
=
m.Result(
"
${url}
"
);
return
str;
}
#endregion
查看全文
相关阅读:
SSIS ->> Excel Destination无法接受大于255个字符长度的字符字段(转载)
在express站点中使用ejs模板引擎
Kali Linux系列教程之OpenVas安装
Kali Linux Web 渗透测试视频教程— 第二课 google hack 实战
Kali Linux Web 渗透测试视频教程— 第四课 google hack 实战
google hack 之 查询语法
shellKali Linux Web 渗透测试— 初级教程(第三课)
NODE-WEBKIT教程(12)全屏
node-webkit教程(11)Platform Service之shell
node-webkit教程(10)Platform Service之File dialogs
原文地址:https://www.cnblogs.com/goody9807/p/961195.html
最新文章
对于DQN的三大改进
GAN与NLP的讨论
JavaScript 为什么要有 Symbol 类型?
Web 性能优化:21 种优化 CSS 和加快网站速度的方法
深入理解JavaScript作用域和作用域链
WEB 实时推送技术的总结
教你快速撸一个免费HTTPS证书
Fundebug前端JavaScript插件更新至1.7.1,拆分录屏代码,还原部分Script error.
使用这些 HTTP 头保护 Web 应用
JavaScript 的 4 种数组遍历方法: for VS forEach() VS for/in VS for/of
热门文章
一文读懂HTTP/2及HTTP/3特性
在.NET Framework中慎用DirectoryInfo.GetFiles方法
EF Core中,通过实体类向SQL Server数据库表中插入数据后,实体对象是如何得到数据库表中的默认值的
SQL Server中的Merge关键字(转载)
Sql Server插入数据并返回自增ID,@@IDENTITY,SCOPE_IDENTITY和IDENT_CURRENT的区别(转载)
SQL Server如何查看当前数据库连接的SPID
SQL Server 中用DBCC Opentran语句查看未关闭的事务(转载)
IIS导入.pfx证书时报错:"A specified logon session does not exist. It may already have been terminated."
SQL Server 索引中include的魅力(具有包含性列的索引)(转载)
C#中用OLEDB操作EXCEL时,单元格内容长度超过255被截断
Copyright © 2011-2022 走看看