上一篇提到采用IHttpModule来实现当用户访问网站的时候,通过重新定义Response.Filter来实现将返回给客户端的html代码保存,以便用户下一次访问是直接访问静态页面。
Asp.Net MVC页面静态化功能实现一:利用IHttpModule,摒弃ResultFilter
后来想到可以通过WebRequest获取html代码,然后采用递归算法来实现。基本实现思路如下:
通过WebRequest获取超链接地址返回的html代码,并保存;然后正则表达式匹配html代码中所有的超链接href=""里面地址信息; 循坏超链接地址,然后再递归WebRequest获取html代码
实现代码如下:
public class HtmlPageHelper { private ArrayList htmlCreatedList = new ArrayList(); /// <summary> /// 递归实现页面静态化功能 /// </summary> /// <param name="urlString">要访问的页面链接地址</param> public void SaveHtmlCode(string urlString) { if (htmlCreatedList.Contains(urlString)) { return; } string htmlCode = GetHtmlCodeFromUrl(urlString); string htmlPath = GetHtmlPathFromUrl(urlString); string direcHtmlPath = Path.GetDirectoryName(htmlPath); if (!Directory.Exists(direcHtmlPath)) { Directory.CreateDirectory(direcHtmlPath); } File.WriteAllText(htmlPath, htmlCode); htmlCreatedList.Add(urlString); ArrayList urlList = GetUrlLinkFromHtmlCode(htmlCode); string urlTemp = string.Empty; foreach (string url in urlList) { urlTemp = url; urlTemp = Regex.Replace(urlTemp, "href\s*=\s*", ""); urlTemp = urlTemp.Replace(""", ""); urlTemp = urlTemp.Replace("\", "/"); urlTemp = WebConfigInfo.UrlPrefix + urlTemp; SaveHtmlCode(urlTemp); } } /// <summary> /// 通过HttpWebRequest页面链接的html代码 /// </summary> /// <param name="urlString">页面链接地址</param> /// <returns>页面链接对应的html代码</returns> private string GetHtmlCodeFromUrl(string urlString) { HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString); hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; hwRequest.Accept = "*/*"; hwRequest.KeepAlive = true; hwRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse(); Stream streamResponse = hwResponse.GetResponseStream(); StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding("utf-8")); string strHtml = readerOfStream.ReadToEnd(); readerOfStream.Close(); streamResponse.Close(); hwResponse.Close(); return strHtml; } /// <summary> /// 分析页面链接地址,生成静态页面保存的物理路径 /// </summary> /// <param name="urlString">页面链接地址</param> /// <returns>静态页面保存的物理路径</returns> private string GetHtmlPathFromUrl(string urlString) { Uri uri = new Uri(urlString); string filePath = HttpContext.Current.Request.PhysicalApplicationPath + "Html" + uri.AbsolutePath + "\"; string[] querys = uri.Query.Split(new char[] { '?', '&', '=' }, StringSplitOptions.RemoveEmptyEntries); foreach (string query in querys) { filePath += query; } filePath += querys.Length.Equals(0) ? "Index.html" : ".html"; filePath = filePath.Replace("/", "\"); filePath = filePath.Replace("\\", "\"); return filePath; } /// <summary> /// 正则表达式匹配出html代码中的超链接 /// </summary> /// <param name="htmlCode">要找出超链接的html代码</param> /// <returns></returns> private ArrayList GetUrlLinkFromHtmlCode(string htmlCode) { ArrayList aList = new ArrayList(); string strRegex = "href\s*=\s*(?:["'](?<1>[^"'.#:]*)["'])"; Regex r = new Regex(strRegex, RegexOptions.IgnoreCase); MatchCollection m = r.Matches(htmlCode); for (int i = 0; i <= m.Count - 1; i++) { string strNew = m[i].ToString().Replace("amp;", ""); if (!aList.Contains(strNew)) { aList.Add(strNew); } } return aList; } }