zoukankan      html  css  js  c++  java
  • C#快速随机按行读取大型文本文件

    下面是我实现的一个数据文件随机读取类,可以随机读取大型文本文件的某一行。在我机器上对一个130MB的文本文件,读取第200000的速度从传统做法的400ms提高到了3ms。
    一般对文本文件进行读取时,一般采用ReadLine()进行逐行读取。在这种情况下,C#内的FileStream和BufferedStream类处理绰绰有余了。它不会将整个文件全部读入,而是有缓冲的读。但是,要想随机读取某一行,在行数据长度不统一的情况下,如果每次这样遍历到指定行,其效率显然是很低下的。
    当然,代价也是有的,引入了第一次打开文件的打开时间,且占用了少部分内存(占用多少是可以设置的,当然占得越小速度也越慢,但最大值也比全部读入要小很多)。

    (对网络代码进行部分改写)

    using System;
    using System.Collections.Generic;
    using System.Text;
    using System.Collections;
    using System.Threading;
    using System.IO;

    namespace DataBuffer
    {
        public static class FileConfig
        {
            public static int STREAM_BUFFER_SIZE = 1024000;
            public static int MAP_DISTANCE = 10;
        }

        public class DataFile
        {
            ///
            /// 数据文件名
            ///
            public string fileName = "";
            ///
            /// 初始化读取完标志
            ///
            public bool done = false;

            ///
            /// 当前流位置
            ///
            public long Position = 0;

            ///
            /// 文件头部信息
            ///
            private Hashtable head = new Hashtable();
            public Hashtable Head { get { return head; } set { head = value; } }

            ///
            /// 文件地图
            ///       
            private ArrayList map = new ArrayList();
            public ArrayList Map { get { return map; } set { map = value; } }

            ///
            /// 文件数据行行数
            ///       
            private long lines = 0;
            public long Lines { get { return lines; } set { lines = value; } }
        }

        public class DataBuffer
        {
            private FileStream fs = null;
            private BufferedStream bs = null;
            private StreamReader sr = null;
            private StreamWriter sw = null;
            ///
            /// 文件信息数据结构
            ///
            public DataFile dataFile = new DataFile();

            public DataBuffer(string name)
            {
                dataFile.fileName = name;
            }

            ///
            /// 打开文件
            ///
            public bool Open()
            {
                try
                {
                    //初始化各流
                    fs = new FileStream(dataFile.fileName, FileMode.Open, FileAccess.ReadWrite);
                    bs = new BufferedStream(fs, FileConfig.STREAM_BUFFER_SIZE);
                    sr = new StreamReader(fs);
                    sw = new StreamWriter(fs);
                    Thread initFile = new Thread(new ThreadStart(InitDataFile));
                    initFile.Start();
                    return true;
                }
                catch (Exception ee)
                {
                    ErrorHandler.ErrorHandler eh = new ErrorHandler.ErrorHandler(ee, "文件打开");
                    return false;
                }
            }

            private void InitDataFile()
            {
                //另开一个读取流
                BufferedStream bs = new BufferedStream(fs);
                StreamReader sr = new StreamReader(bs);

                //读入数据文件头信息。共14行
                string thisLine = NextLine(ref sr);
                dataFile.Head.Add("Subject", thisLine.Substring(11));

                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Date", thisLine.Substring(8));

                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Time", thisLine.Substring(8));

                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Channels", thisLine.Substring(12));

                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Rate", thisLine.Substring(8));

                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Type", thisLine.Substring(8));

                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Rows", thisLine.Substring(8));

                thisLine = NextLine(ref sr);
                thisLine = NextLine(ref sr);
                dataFile.Head.Add("Electrode Labels", thisLine);
                thisLine = NextLine(ref sr);
                thisLine = NextLine(ref sr);
                thisLine = NextLine(ref sr);
                thisLine = NextLine(ref sr);
                thisLine = NextLine(ref sr);
                //降低自己的优先级
                Thread.CurrentThread.Priority = ThreadPriority.BelowNormal;

                //数行数,建立地图
                long lines = 1;
                //在地图中加入首条数据的位置信息
                dataFile.Map.Add(dataFile.Position);
                //顺序建立文件地图
                while (!sr.EndOfStream)
                {
                    thisLine = NextLine(ref sr);
                    if ((++lines) % FileConfig.MAP_DISTANCE == 0)
                    {
                        dataFile.Map.Add(dataFile.Position);
                    }
                }
                dataFile.Lines = lines;
                dataFile.done = true;
            }

            ///
            /// 文件关闭
            ///
            public bool Close()
            {
                try
                {
                    //顺序关闭各流
                    sw.Close();
                    sr.Close();
                    bs.Close();
                    fs.Close();
                    return true;
                }
                catch (Exception ee)
                {
                    ErrorHandler.ErrorHandler eh = new ErrorHandler.ErrorHandler(ee, "文件关闭");
                    return false;
                }
            }

            ///
            /// 顺序读取下一行。效率低不建议大规模使用,只在打开文件的时候使用一次
            ///
            ///
            public string NextLine(ref StreamReader sr)
            {
                string next = sr.ReadLine();
                //+2是指Windows换行回车。Linux下要改为+1
                dataFile.Position += next.Length + 2;
                return next;
            }

            //指定的目标行内容
            public string ReadLine(long line)
            {
                try
                {
                    //如果载入完毕
                    if (dataFile.done)
                    {
                        //确定数据块索引号
                        int index = (int)line / FileConfig.MAP_DISTANCE;
                        //移动到指定位置
                        bs.Seek(long.Parse(dataFile.Map[index].ToString()), SeekOrigin.Begin);
                        //创建流读取器
                        sr = new StreamReader(bs);
                        //移动到指定行
                        for (int i = 1; i <= (line - index * FileConfig.MAP_DISTANCE); i++)
                        {
                            sr.ReadLine();
                        }
                        //返回指定行的值
                        return sr.ReadLine();
                    }
                    else
                    {
                        return "";
                    }
                }
                catch (Exception ee)
                {
                    ErrorHandler.ErrorHandler eh = new ErrorHandler.ErrorHandler(ee, "文件读取");
                    return "";
                }
            }
        }

    }

  • 相关阅读:
    字符编码 进制转换
    Android工具HierarchyViewer 代码导读(1) 功能实现演示
    jQuery中的bind(), live(), on(), delegate()
    [转]ActionScript3.0中XML处理方法
    Pane和Panel的区别
    [转]在命令行中编译运行Java Applet
    [转]关于五险一金,你知道多少?
    [转]ActionScript3.0对象深复制
    [转]用Flashbug调试Flash
    [转]用EditPlus搭建简易的Java开发环境
  • 原文地址:https://www.cnblogs.com/amylis_chen/p/4001818.html
Copyright © 2011-2022 走看看