zoukankan      html  css  js  c++  java
  • Asp.Net MVC页面静态化功能实现二:用递归算法来实现

    上一篇提到采用IHttpModule来实现当用户访问网站的时候,通过重新定义Response.Filter来实现将返回给客户端的html代码保存,以便用户下一次访问是直接访问静态页面。

    Asp.Net MVC页面静态化功能实现一:利用IHttpModule,摒弃ResultFilter

    后来想到可以通过WebRequest获取html代码,然后采用递归算法来实现。基本实现思路如下:

    通过WebRequest获取超链接地址返回的html代码,并保存;然后正则表达式匹配html代码中所有的超链接href=""里面地址信息; 循坏超链接地址,然后再递归WebRequest获取html代码

    实现代码如下:

    public class HtmlPageHelper
    {
        private ArrayList htmlCreatedList = new ArrayList();
    
        /// <summary>
        /// 递归实现页面静态化功能
        /// </summary>
        /// <param name="urlString">要访问的页面链接地址</param>
        public void SaveHtmlCode(string urlString)
            {
                if (htmlCreatedList.Contains(urlString))
                {
                    return;
                }
                string htmlCode = GetHtmlCodeFromUrl(urlString);
                string htmlPath = GetHtmlPathFromUrl(urlString);
                string direcHtmlPath = Path.GetDirectoryName(htmlPath);
                if (!Directory.Exists(direcHtmlPath))
                {
                    Directory.CreateDirectory(direcHtmlPath);
                }
                File.WriteAllText(htmlPath, htmlCode);
                htmlCreatedList.Add(urlString);
    
                ArrayList urlList = GetUrlLinkFromHtmlCode(htmlCode);
                string urlTemp = string.Empty;
                foreach (string url in urlList)
                {
                    urlTemp = url;
                    urlTemp = Regex.Replace(urlTemp, "href\s*=\s*", "");
                    urlTemp = urlTemp.Replace(""", "");
                    urlTemp = urlTemp.Replace("\", "/");
                    urlTemp = WebConfigInfo.UrlPrefix + urlTemp;
                    SaveHtmlCode(urlTemp);
                }
            }
    
        /// <summary>
        /// 通过HttpWebRequest页面链接的html代码
        /// </summary>
        /// <param name="urlString">页面链接地址</param>
        /// <returns>页面链接对应的html代码</returns>
        private string GetHtmlCodeFromUrl(string urlString)
            {
                HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString);
                hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
                hwRequest.Accept = "*/*";
                hwRequest.KeepAlive = true;
                hwRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse();
                Stream streamResponse = hwResponse.GetResponseStream();
                StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding("utf-8"));
                string strHtml = readerOfStream.ReadToEnd();
                readerOfStream.Close();
                streamResponse.Close();
                hwResponse.Close();
                return strHtml;
            }
    
        /// <summary>
        /// 分析页面链接地址,生成静态页面保存的物理路径
        /// </summary>
        /// <param name="urlString">页面链接地址</param>
        /// <returns>静态页面保存的物理路径</returns>
        private string GetHtmlPathFromUrl(string urlString)
            {
                Uri uri = new Uri(urlString);
                string filePath = HttpContext.Current.Request.PhysicalApplicationPath + "Html" + uri.AbsolutePath + "\";
                string[] querys = uri.Query.Split(new char[] { '?', '&', '=' }, StringSplitOptions.RemoveEmptyEntries);
                foreach (string query in querys)
                {
                    filePath += query;
                }
                filePath += querys.Length.Equals(0) ? "Index.html" : ".html";
                filePath = filePath.Replace("/", "\");
                filePath = filePath.Replace("\\", "\");
                return filePath;
            }
    
        /// <summary>
        /// 正则表达式匹配出html代码中的超链接
        /// </summary>
        /// <param name="htmlCode">要找出超链接的html代码</param>
        /// <returns></returns>
        private ArrayList GetUrlLinkFromHtmlCode(string htmlCode)
            {
                ArrayList aList = new ArrayList();
                string strRegex = "href\s*=\s*(?:["'](?<1>[^"'.#:]*)["'])";
                Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
                MatchCollection m = r.Matches(htmlCode);
                for (int i = 0; i <= m.Count - 1; i++)
                {
                    string strNew = m[i].ToString().Replace("amp;", "");
                    if (!aList.Contains(strNew))
                    {
                        aList.Add(strNew);
                    }
                }
                return aList;
            }
    }
  • 相关阅读:
    linux command ubuntu
    C++ project
    windows command line
    vi command
    C++ Language
    postgresql backup
    C Language
    L1-046. 整除光棍(模拟除法)
    pta 拯救007(Floyd)
    CCF 201609-4 交通规划
  • 原文地址:https://www.cnblogs.com/tracine0513/p/4936735.html
Copyright © 2011-2022 走看看