在一些需要经常更新页面数据的网站中,一般访问量不是很大的都直接发布的是带后台代码,每次访问都是有数据库交互的。但是一旦访问量增加了,那么这些服务器开销变成本就要考虑进来了,像一些文章,后台编辑后,文章内容存入数据库,如果1000人访问,如果还是每次取数据库,那这1000次的io访问就显得比较大了,一个好的方法就是,文章确定之后,做成静态页面,而这个做的方法由程序来做,就是递归遍历整个网站,将网站内容都访问一遍,然后生成这些页面的静态文本页面,在将这些页面发布,这样对浏览者而言,他看到的还是同一个地址,同一份文章,只是这份是静态的而言。这样就提升了网站的效率节约了资源;
下面附上一份C#遍历网站内容,然后生成内容页面代码;
- private ArrayList htmlCreatedList = new ArrayList();
- /// <summary>
- /// 递归实现页面静态化功能
- /// </summary>
- /// <param name="urlString">要访问的页面链接地址</param>
- public void SaveHtmlCode(string urlString)
- {
- if (htmlCreatedList.Contains(urlString))
- {
- return;
- }
- string htmlCode = GetHtmlCodeFromUrl(urlString);
- string htmlPath = urlString.ToPhysicalPath();
- string direcHtmlPath = Path.GetDirectoryName(htmlPath);
- if (!Directory.Exists(direcHtmlPath))
- {
- Directory.CreateDirectory(direcHtmlPath);
- }
- File.WriteAllText(htmlPath, htmlCode);
- htmlCreatedList.Add(urlString);
- var urlList = GetUrlLinkFromHtmlCode(htmlCode);
- string urlTemp = string.Empty;
- foreach (string url in urlList)
- {
- urlTemp = url;
- urlTemp = Regex.Replace(urlTemp, "href\s*=\s*", "");
- urlTemp = urlTemp.Replace(""", "");
- urlTemp = urlTemp.Replace("\", "/");
- urlTemp = WebConfigInfo.UrlPrefix + urlTemp;
- SaveHtmlCode(urlTemp);
- }
- }
- /// <summary>
- /// 通过HttpWebRequest页面链接的html代码
- /// </summary>
- /// <param name="urlString">页面链接地址</param>
- /// <returns>页面链接对应的html代码</returns>
- private string GetHtmlCodeFromUrl(string urlString)
- {
- HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString);
- hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
- hwRequest.Accept = "*/*";
- hwRequest.KeepAlive = true;
- hwRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
- HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse();
- Stream streamResponse = hwResponse.GetResponseStream();
- StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding("utf-8"));
- string strHtml = readerOfStream.ReadToEnd();
- readerOfStream.Close();
- streamResponse.Close();
- hwResponse.Close();
- return strHtml;
- }
- ///<summary>
- ///正则表达式匹配出html代码中的超链接
- ///</summary>
- ///<param name="htmlCode">要找出超链接的html代码</param>
- ///<returns></returns>
- private IEnumerable<string> GetUrlLinkFromHtmlCode(string htmlCode)
- {
- string strRegex = "href\s*=\s*(?:["'](?<1>[^"'.#:]*)["'])";
- Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
- MatchCollection ms = r.Matches(htmlCode);
- IEnumerable<string> listUrl = from Match cc in ms select cc.ToString().Replace("&", "&");
- return listUrl.Distinct();
- }
- }
给string 扩展了一个方法。
- public static string ToPhysicalPath(this string urlString)
- {
- System.Uri uri = new System.Uri(urlString);
- string htmlPath = string.Format("{0}\Html\{1}\", System.Web.HttpContext.Current.Request.PhysicalApplicationPath, uri.AbsolutePath);
- string[] querys = uri.Query.Split(new char[] { '?', '&', '=' }, StringSplitOptions.RemoveEmptyEntries);
- htmlPath += string.Join(string.Empty, querys);
- htmlPath += querys.Length.Equals(0) ? "Index.html" : ".html";
- htmlPath = htmlPath.Replace("/", "\");
- htmlPath = htmlPath.Replace("\\", "\");
- return htmlPath;
- }