zoukankan      html  css  js  c++  java
  • c# txt内存映射技术总结

    对于大文件操作,readline 的方式读取文档,那操作起来跟蜗牛爬一样的慢了, 于是使用内存映射技术,

    参考微软的这个使用方法说明

    https://msdn.microsoft.com/zh-cn/library/dd997372(v=vs.110).aspx?cs-save-lang=1&cs-lang=csharp#code-snippet-1

    1: 主要用到下面两个方法,一个是打开一个文本,一个是对文本进行操作

    using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(srcFile, FileMode.Open, "xyz", size))
    
    using (mmf_reader = mmf.CreateViewAccessor(0, portion1, MemoryMappedFileAccess.Read))
    可以对文本按照位置提取相应内容,提取的内容可以使用byte方式 ,若是都为英文或者数字键盘符号的文档.这样节省很多资源

    2:按照块提取会存在一个问题,你的分块肯定会破坏文档里边的整个行,所以我的方法是按照分块,确定块的附近换行符的位置.

    3:根据换行符 确定了位置,把一个大文件分成块,当然也可以一次读入到内存,做进一步处理,你可以参考微软的帮助做相应的优化.

    4: 打开一个新的保存文件,不破坏源文件,不在源文件上操作, 

    5:启动task[]线程组,每个块分配一个task去做相关处理.

    6:for循环涉及到每块处理的先后顺序

      Task[] tasks = new Task[t];
                for (int i = 0; i < mappedFiles.Count; i++)
                {
                    tasks[i] = Task.Factory.StartNew(action, i);
                    tasks[i].Wait();
                }

    这里使用了Action 无返回值,和Fuc 带参数有返回值两种方式,我选择了前者 .

      Action<object> action = (object obj) =>
     {
    }

    7:还可以使用streamreader 对打开的文本进行操作

     using (FileStream fs = new FileStream(TXT_FILE_PATH, FileMode.Open, FileAccess.ReadWrite))
                {
                    long targetRowNum = ttargetRowNum + 1;//目标行
                    long curRowNum = 1;//当前行
                    FILE_SIZE = fs.Length;
                    using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(fs, "test", fs.Length, MemoryMappedFileAccess.ReadWrite, null, HandleInheritability.None, false))
                    {
                        long offset = 0;
                        //int limit = 250;
                        int limit = 200;
                        try
                        {
                            StringBuilder sbDefineRowLine = new StringBuilder();
                            do
                            {
                                long remaining = fs.Length - offset;

      using (MemoryMappedViewStream mmStream = mmf.CreateViewStream(offset, remaining > limit ? limit : remaining))
    //using (MemoryMappedViewStream mmStream = mmf.CreateViewStream(offset, remaining))
                                {
                                    offset += limit;
                                    using (StreamReader sr = new StreamReader(mmStream))
                                    {
                                        //string ss = sr.ReadToEnd().ToString().Replace("
    ", "囧").Replace(Environment.NewLine, "囧");
                                        string ss = sr.ReadToEnd().ToString().Replace("
    ", SPLIT_VARCHAR).Replace(Environment.NewLine, SPLIT_VARCHAR);

    可以直接读取块到byte[] buffer 块 我用的这种

    private static void SpiltFile(string srcFile, int portionSize)  
           {  
               string savedPath = @"\stcsrv-c81MMFeedHealthyDatacache2016_07_10FeedkeysNo_Process_test.txt";  
               FileInfo fi = new FileInfo(srcFile);  
               // total size in bytes  
               Int64 size = fi.Length;  
               object locker = new object();  
               object writeLock = new object();  
               List<MappedFile> mappedFiles = new List<MappedFile>();  
               Int64 fileToRead = size;//文件总的大小  
           
               portionSize = portionSize * 1024 * 1024; //每块大小  
      
               Int64 portion = (Int64)Math.Ceiling(size * 1.0 / portionSize); //分成多少块  
      
               Int64 fileOffset = 0;  
      
               MemoryMappedViewAccessor mmf_reader = null;  
               Stopwatch watch = Stopwatch.StartNew();  
               watch.Start();  
               Int64 fileSize = 0;  
               using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(srcFile, FileMode.Open, "xyz", size))  
               {  
                   //using (var writeMap = MemoryMappedFile.CreateFromFile(savedPath, FileMode.Create, "test", size, MemoryMappedFileAccess.ReadWrite))  
                   //{  
                       //bool mutexCreated;  
                       //Mutex mutex = new Mutex(true, "testmapmutex", out mutexCreated);//进程间同步  
                       Parallel.For(0, portion, (i, ParallelLoopState) =>  
                       {  
      
                           //for (int i = 26; i < portion; i++)  
                           //{  
                           lock (locker)  
                           {  
                               fileSize = Math.Min(portionSize, fileToRead - portionSize * i);  
                               if (fileSize > 0)  
                               {  
                                   byte[] buffer;  
                                   using (mmf_reader = mmf.CreateViewAccessor(i * portionSize, fileSize, MemoryMappedFileAccess.Read))  
                                   {  
                                       buffer = new byte[fileSize];  
                                       mmf_reader.ReadArray(0, buffer, 0, (int)fileSize);  
                                       mappedFiles.Add(new MappedFile  
                                       {  
                                           Offset = i * portionSize, //fileOffset,  
                                           Buffer = buffer,  
                                           FileSize = fileSize  
                                       });  
                                   }  
      
                                   //fileToRead -= fileSize;  
                                   //lock (writeLock)  
                                   //{  
                                   //using (var writeMmf = MemoryMappedFile.OpenExisting("xyz"))  
                                   //{  
                                   //    using (var writeAccessor = writeMmf.CreateViewStream(i * portionSize, fileSize))  
                                   //    {  
                                   //        var w = new BinaryWriter(new FileStream(savedPath, FileMode.Create, FileAccess.Write));  
                                   //        //writeAccessor.WriteArray(i * portionSize, buffer, 0, buffer.Length);  
                                   //        //writeAccessor.Write(buffer, 0, buffer.Length);  
                                   //        w.Write(buffer);  
                                   //    }  
                                   //}  
      
                                   //using (MemoryMappedViewAccessor writeView = writeMap.CreateViewAccessor())  
                                   //{  
                                   //    writeView.WriteArray(i * portionSize, buffer, 0, (int)fileSize);  
                                   //}  
      
                               }  
                               //}  
                           }  
      
                       });  
                   }  
      
      
               using (var writeMap = MemoryMappedFile.CreateFromFile(savedPath, FileMode.Create, "test", size, MemoryMappedFileAccess.ReadWrite))  
               {  
                   using (MemoryMappedViewAccessor writeView = writeMap.CreateViewAccessor())  
                   {  
                       Parallel.For(0, mappedFiles.Count, i =>  
                       {  
                           try  
                           {  
                               Monitor.Enter(locker);  
                               writeView.WriteArray(mappedFiles[i].Offset, mappedFiles[i].Buffer, 0, (int)mappedFiles[i].FileSize);  
                           }  
                           catch (Exception)  
                           {  
      
                               throw;  
                           }  
                           finally  
                           {  
                               Monitor.Exit(locker);  
                           }  
      
                       });  
                   }  
               }  
               watch.Stop();  
               Console.WriteLine(watch.ElapsedMilliseconds);  
               #region MyRegion  
           }  
    [csharp] view plain copy
    public class MappedFile  
    {  
        public long Offset { get; set; }  
        public byte[] Buffer { get; set; }  
        public long FileSize { get; set; }  

    C# 中字符串string和字节数组byte[]的转换

    string转byte[]:

    byte[] byteArray = System.Text.Encoding.Default.GetBytes ( str );

    byte[]转string:

    string str = System.Text.Encoding.Default.GetString ( byteArray );

    string转ASCII byte[]:

    byte[] byteArray = System.Text.Encoding.ASCII.GetBytes ( str );

    ASCII byte[]转string:

    string str = System.Text.Encoding.ASCII.GetString ( byteArray );
  • 相关阅读:
    测试面试03
    测试面试02
    测试面试01
    测试10
    测试09
    Python 知识要点:变量 可变和不可变
    Python 知识要点:变量及引用
    Python 知识要点:名片管理系统 2.0
    Python 知识要点:多值参数
    Python 知识要点:四种数值交换方法
  • 原文地址:https://www.cnblogs.com/zuochanzi/p/7356722.html
Copyright © 2011-2022 走看看