zoukankan      html  css  js  c++  java
  • C#读取HTML文件内容写入记事本 (转自_逍遥2010_百度空间)

    C#读取HTML文件内容写入记事本

    try
                {
                    int totalFile = 0;
                    //string dirPath = @"E:\chfuMetarnet\BSC6810 alarm\";
                    if (this.textBox1.Text.Trim() == "")
                    {
                        MessageBox.Show("请输入HTML文件路径!");
                    }
                    else
                    {
                        string dirPath = this.textBox1.Text.Trim();
                        if (!dirPath.Substring(dirPath.Length - 1).Contains("\\"))
                        {
                            dirPath = dirPath+"\\";
                        }
                        StreamWriter sw;                    DirectoryInfo dirInfo = new DirectoryInfo(dirPath);
                        FileInfo[] files = dirInfo.GetFiles();
                        string filename = dirPath + "告警经验库信息.txt";                    if (File.Exists(filename))
                        {
                            sw = File.AppendText(filename);
                        }
                        else
                        {
                            sw = File.CreateText(filename);
                        }                     foreach (FileInfo fileinfo in files)
                        {
                            if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件
                            {
                                totalFile = totalFile + 1;                            WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name);
                                WebResponse myWebResponse = myWebRequest.GetResponse();
                                Stream myStream = myWebResponse.GetResponseStream();
                                Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
                                StreamReader myStreamReader = new StreamReader(myStream, encode);                            string strhtml = myStreamReader.ReadToEnd();
                                myWebResponse.Close();
                                string stroutput = strhtml;
                                Regex regex = new Regex(@"<[^>]+>|</[^>]+>");//去掉HTML标记的正则表达式                            string tmpStr = "<h4>([^<]*)</h4>";        //获取<h4>之间内容的表达式
                                Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                                string causename = TitleMatch.Value.ToString();//包含<h4>和</h4>标记
                                causename = Regex.Replace(causename, "[\n|\r|\t]", " ");//去掉换行和TAB键符号
                                causename = causename.Trim();
                                string cause = causename.Substring(4, causename.Length - 9);//得到告警原因                            string titleStr = "<title>([^<]*)</title>";
                                TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                                string titlename = TitleMatch.Value.ToString();
                                titlename = Regex.Replace(titlename, "[\n|\r|\t]", "");//去掉换行和TAB键符号
                                titlename = titlename.Trim();                            string regexStr = "<ul><li>(?<key>.*?)</ul>";//获取<ul><li>后边的内容,直到</ul>结尾
                                Regex r = new Regex(regexStr, RegexOptions.None);
                                strhtml = Regex.Replace(strhtml, "[\n|\r|\t]", "");//去掉换行和TAB键符号
                                Match mc = r.Match(strhtml);
                                string dataStr = mc.Groups["key"].Value;
                                dataStr = "<ul><li>" + dataStr + "</ul>";//得到完整的<ul></ul>之间的源码                            strhtml = strhtml.Replace(dataStr, "");//将去掉换行符和tab键的源码中去除<ul></ul>部分源码
                                strhtml = strhtml.Replace(titlename, "");//去掉<title></title>
                                strhtml = regex.Replace(strhtml, " ");//过滤掉HTML标记
                                strhtml = strhtml.Replace("&nbsp;", "");//去掉空格字符                            string[] arr = cause.Split(' ');
                                string zhCause = arr[arr.Length - 1];//获取数组最后一个元素:告警原因                            sw.WriteLine("第" + totalFile + "个文件:" + fileinfo.Name);
                                sw.WriteLine("-----告警原因------:");
                                //sw.WriteLine(cause);// ALM-1 网元启动
                                zhCause=this.chinaString(zhCause);
                                sw.WriteLine(zhCause);//网元启动
                                sw.WriteLine("-----处理经验------:");
                                sw.WriteLine(strhtml);
                                sw.WriteLine();
                                sw.Flush();
                            }
                        }
                        sw.Close();
                        MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                    }            }
                catch (Exception ee)
                {
                    MessageBox.Show("操作失败:" + ee.Message);
                }
  • 相关阅读:
    程序员需要的各种PDF格式电子书【附网盘免费下载资源地址】
    Web安全大揭秘
    tar 压缩解压命令详解
    django开发项目的部署nginx
    CentOS7安装mysql-python模块
    我的博客站点上线了
    2006
    centos7安装pip
    mysql删除匿名用户
    FilenameFilter 文件名过滤
  • 原文地址:https://www.cnblogs.com/zjw520/p/3014804.html
Copyright © 2011-2022 走看看