public static string GetSummary(string html, int summaryLength)
{
string text = GetHtmlText(html);
text = DelHTML(text);
text = Strtitle(text);
if (summaryLength >= text.Length)
return text;
return text.Substring(0, summaryLength)+"";
}
public static string GetHtmlText(string html)
{
return System.Text.RegularExpressions.Regex.Replace(html, @"<[^>]*>", "");
}
public static string DelHTML(string Htmlstring)//将HTML去除
{
return Htmlstring.Trim();
}
public static string DelWords(string strtitle)
{
string strContent = strtitle;
strtitle = Regex.Replace(strContent, "(<H1.*?>|</H1>|</br>|<SPAN.*?>|</SPAN>|<FONT.*?>||<o:p></o:p>)", "");
strContent = strtitle;
strContent = Regex.Replace(strContent, "(<IMG.*?>)", "");
strContent = Regex.Replace(strContent, "(<P.*?>)", "");
strContent = Regex.Replace(strContent, "(<P>)", "");
strContent = Regex.Replace(strContent, "( )", "");
strContent = Regex.Replace(strContent, "(”)", "");
strContent = Regex.Replace(strContent, "(</P>)", "");
strContent = Regex.Replace(strContent, "(</BR>)", "");
strContent = Regex.Replace(strContent, "(<BR>)", "");
strContent = strContent.Replace("<?xml:namespace prefix = o ns = 'urn:schemas-microsoft-com:office:office'/>", "");
return strtitle;
}
{
string text = GetHtmlText(html);
text = DelHTML(text);
text = Strtitle(text);
if (summaryLength >= text.Length)
return text;
return text.Substring(0, summaryLength)+"";
}
public static string GetHtmlText(string html)
{
return System.Text.RegularExpressions.Regex.Replace(html, @"<[^>]*>", "");
}
public static string DelHTML(string Htmlstring)//将HTML去除
{
return Htmlstring.Trim();
}
public static string DelWords(string strtitle)
{
string strContent = strtitle;
strtitle = Regex.Replace(strContent, "(<H1.*?>|</H1>|</br>|<SPAN.*?>|</SPAN>|<FONT.*?>||<o:p></o:p>)", "");
strContent = strtitle;
strContent = Regex.Replace(strContent, "(<IMG.*?>)", "");
strContent = Regex.Replace(strContent, "(<P.*?>)", "");
strContent = Regex.Replace(strContent, "(<P>)", "");
strContent = Regex.Replace(strContent, "( )", "");
strContent = Regex.Replace(strContent, "(”)", "");
strContent = Regex.Replace(strContent, "(</P>)", "");
strContent = Regex.Replace(strContent, "(</BR>)", "");
strContent = Regex.Replace(strContent, "(<BR>)", "");
strContent = strContent.Replace("<?xml:namespace prefix = o ns = 'urn:schemas-microsoft-com:office:office'/>", "");
return strtitle;
}