zoukankan      html  css  js  c++  java
  • C#读取HTML文件内容写入记事本

    C#读取HTML文件内容写入记事本


    try
    {
    int totalFile = 0;
    //string dirPath = @"E:\chfuMetarnet\BSC6810 alarm\";
    if (this.textBox1.Text.Trim() == "")
    {
    MessageBox.Show("请输入HTML文件路径!");
    }
    else
    {
    string dirPath = this.textBox1.Text.Trim();
    if (!dirPath.Substring(dirPath.Length - 1).Contains("\\"))
    {
    dirPath = dirPath+"\\";
    }
    StreamWriter sw;
    DirectoryInfo dirInfo = new DirectoryInfo(dirPath);
    FileInfo[] files = dirInfo.GetFiles();
    string filename = dirPath + "告警经验库信息.txt";
    if (File.Exists(filename))
    {
    sw = File.AppendText(filename);
    }
    else
    {
    sw = File.CreateText(filename);
    }
    foreach (FileInfo fileinfo in files)
    {
    if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件
    {
    totalFile = totalFile + 1;
    WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name);
    WebResponse myWebResponse = myWebRequest.GetResponse();
    Stream myStream = myWebResponse.GetResponseStream();
    Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
    StreamReader myStreamReader = new StreamReader(myStream, encode);
    string strhtml = myStreamReader.ReadToEnd();
    myWebResponse.Close();
    string stroutput = strhtml;
    Regex regex = new Regex(@"<[^>]+>|</[^>]+>");//去掉HTML标记的正则表达式
    string tmpStr = "<h4>([^<]*)</h4>"; //获取<h4>之间内容的表达式
    Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
    string causename = TitleMatch.Value.ToString();//包含<h4>和</h4>标记
    causename = Regex.Replace(causename, "[\n|\r|\t]", " ");//去掉换行和TAB键符号
    causename = causename.Trim();
    string cause = causename.Substring(4, causename.Length - 9);//得到告警原因
    string titleStr = "<title>([^<]*)</title>";
    TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
    string titlename = TitleMatch.Value.ToString();
    titlename = Regex.Replace(titlename, "[\n|\r|\t]", "");//去掉换行和TAB键符号
    titlename = titlename.Trim();
    string regexStr = "<ul><li>(?<key>.*?)</ul>";//获取<ul><li>后边的内容,直到</ul>结尾
    Regex r = new Regex(regexStr, RegexOptions.None);
    strhtml = Regex.Replace(strhtml, "[\n|\r|\t]", "");//去掉换行和TAB键符号
    Match mc = r.Match(strhtml);
    string dataStr = mc.Groups["key"].Value;
    dataStr = "<ul><li>" + dataStr + "</ul>";//得到完整的<ul></ul>之间的源码
    strhtml = strhtml.Replace(dataStr, "");//将去掉换行符和tab键的源码中去除<ul></ul>部分源码
    strhtml = strhtml.Replace(titlename, "");//去掉<title></title>
    strhtml = regex.Replace(strhtml, " ");//过滤掉HTML标记
    strhtml = strhtml.Replace("&nbsp;", "");//去掉空格字符
    string[] arr = cause.Split(' ');
    string zhCause = arr[arr.Length - 1];//获取数组最后一个元素:告警原因
    sw.WriteLine("" + totalFile + "个文件:" + fileinfo.Name);
    sw.WriteLine("-----告警原因------:");
    //sw.WriteLine(cause);// ALM-1 网元启动
    zhCause=this.chinaString(zhCause);
    sw.WriteLine(zhCause);//网元启动
    sw.WriteLine("-----处理经验------:");
    sw.WriteLine(strhtml);
    sw.WriteLine();
    sw.Flush();

    }
    }
    sw.Close();
    MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
    }
    }
    catch (Exception ee)
    {
    MessageBox.Show("操作失败:" + ee.Message);
    }

  • 相关阅读:
    javascript获取xml节点的最大值
    iis 不能浏览aspx页面
    批量替换文件夹里面的文本文件的指定字符
    select update delete
    SQL IAM的理解
    数据库的页构成
    sqltype IsDBNull
    MSSQL优化教程之1.4 其他几种类型的页面
    SqlDataAdapter
    行状态,行版本
  • 原文地址:https://www.cnblogs.com/xiaoyao2011/p/2195197.html
Copyright © 2011-2022 走看看