zoukankan      html  css  js  c++  java
  • 去掉txt中的重复标题

    想把小说转为mobi格式,找了个软件Txt2eBook.exe,把小说拖进去后,发现标题是重复的,格式如下:

    正文 第一章 某某

    第一章某某

    正文。。。。

    去掉多余的标题,思路是逐行读取,把除多余的标题外的内容原格式写到新文本文件中。

    代码如下:

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.Windows.Forms;
    using System.IO;
    
    namespace readText
    {
        public partial class Form1 : Form
        {
            long coun;
            public Form1()
            {
                coun=count();
                InitializeComponent();
            }
            private long count()//确定总行数,用于查看进度,但实际效果并不好。
            {
                string path = @"D:\test.txt";
                StreamReader sr = new StreamReader(path, Encoding.GetEncoding("gb2312"));
               string line;
                line=sr.ReadLine();
                while (line != null)
                {
                    coun++;
                    line = sr.ReadLine();
                }
                return coun;
            }
            private void button1_Click(object sender, EventArgs e)
            {
                long lin = 0;
                string path = @"D:\test.txt";
                string path1 = "D:\\test1.txt";
                StreamReader sr = new StreamReader(path, Encoding.GetEncoding("gb2312"));
                StreamWriter sw = new StreamWriter(path1);
                sr.BaseStream.Seek(0,SeekOrigin.Begin);
                try
                {               
                    string line = sr.ReadLine();
                        while (line!=null)
                        {                        
                            string a = line.ToString().Trim();
                           // sw.WriteLine(line);
                           string []temp=a.Split(new char[] {' '});//以空格分隔
                            // string[] temp = a.Split(' ');
                            if (temp.Length == 1)//不是需要保留的标题格式,无需判断直接写入
                            {
                                sw.WriteLine(line);
                                line = sr.ReadLine();
                                lin++;
                                this.label1.Text = lin.ToString() + "/" + coun.ToString();
                            }
                            else
                            {
                                string aa = temp[temp.Length-2] + temp[temp.Length-1];
                                // MessageBox.Show(temp[2].ToString());
                                sw.WriteLine(line);
                                line = sr.ReadLine();
                                string b = line.ToString().Trim();
                                if (aa.Trim() != b.Trim())//判断标题行与下一行是否重复
                                {                                
                                    sw.WriteLine(line);
                                    lin += 2;
                                    this.label1.Text = lin.ToString() + "/" + coun.ToString();
                                }                          
                                line = sr.ReadLine();
                                lin++;
                                this.label1.Text = lin.ToString() + "/" + coun.ToString();
                            }
                            //else
                            //{
                            //    sw.WriteLine(line);
                            //    line = sr.ReadLine();
                            //}
                            
                        }
                        sr.Close();
                        sw.Flush();
                        sw.Close();
                        MessageBox.Show("OK");
                }
                catch (Exception ex)
                {
                    MessageBox.Show(ex.ToString());
                }
            }
        }
    }
    

      

  • 相关阅读:
    EOS之session的数据获取
    c# 数据库操作之ACCESS
    基础之创建与导出
    dotNET5的MVC页面传值方式总结
    dotNET开发之MVC中Controller返回值类型ActionResult方法总结
    C# 计算农历日期方法(2021版)
    普通邮箱设置客户端授权码并开启stmp服务以及关于QQ邮箱“命令顺序不正确。 服务器响应为:Error: need EHLO and AUTH first !”问题全指导
    13 张图,深入理解 Synchronized
    Springboot 注解大全
    python中的print()函数的学习-1
  • 原文地址:https://www.cnblogs.com/BeyondTime/p/3107862.html
Copyright © 2011-2022 走看看