zoukankan      html  css  js  c++  java
  • LINQ To Objects

    记录 LINQ to Objects 的学习

    LINQ to Objects 是指可将Linq查询用于继承 IEnumerable 或 IEnumerable<T> 的集合类型,包括框架本身定义的 List、Array、Dictionary,也可以是通过实现上面枚举接口的自定义集合类型。Linq 查询应用在字符串集合上,使得处理文本文件中的半结构化数据时非常有用。

    对某个词在字符串上出现的次数统计。(ToLowerInvariant 返回时使用固定区域性的大小写规则。

      string text = @"Historically, the world of data and the world of objects" +
    @" have not been well integrated. Programmers work in C# or Visual Basic" +
    @" and also in SQL or XQuery. On the one side are concepts such as classes," +
    @" objects, fields, inheritance, and .NET Framework APIs. On the other side" +
    @" are tables, columns, rows, nodes, and separate languages for dealing with" +
    @" them. Data types often require translation between the two worlds; there are" +
    @" different standard functions. Because the object world has no notion of query, a" +
    @" query can only be represented as a string without compile-time type checking or" +
    @" IntelliSense support in the IDE. Transferring data from SQL tables or XML trees to" +
    @" objects in memory is often tedious and error-prone.";
    
      string searchTerm = "data";
    
      //Convert the string into an array of words  
      string[] source = text.Split(new char[] { '.', '?', '!', ' ', ';', ':', ',' }, 
          StringSplitOptions.RemoveEmptyEntries);
    
      // Create the query.  Use ToLowerInvariant to match "data" and "Data"   
      var matchQuery = from word in source
                       where word.ToLowerInvariant() == searchTerm.ToLowerInvariant()
                       select word;
    
      // Count the matches, which executes the query.  
      int wordCount = matchQuery.Count();
      Console.WriteLine("{0} occurrences(s) of the search term "{1}" were found.", wordCount, searchTerm);
    View Code

    查询一组包含指定单词的句子。

       string text = @"Historically, the world of data and the world of objects" +
     @" have not been well integrated. Programmers work in C# or Visual Basic" +
     @" and also in SQL or XQuery. On the one side are concepts such as classes," +
     @" objects, fields, inheritance, and .NET Framework APIs. On the other side" +
     @" are tables, columns, rows, nodes, and separate languages for dealing with" +
     @" them. Data types often require translation between the two worlds; there are" +
     @" different standard functions. Because the object world has no notion of query, a" +
     @" query can only be represented as a string without compile-time type checking or" +
     @" IntelliSense support in the IDE. Transferring data from SQL tables or XML trees to" +
     @" objects in memory is often tedious and error-prone.";
    
       //句子分隔
       string[] sentences = text.Split(new char[] { '.', '?', '!' });
    
       //句子中的单词分隔符
       var wordSeperator = new char[] { ' ', ';', ':', ',' };
    
       //句子包含的单词  
       string[] wordsToMatch = { "Historically", "data", "integrated" };
    
       var sentenceQuery = from sentence in sentences
                           let words = sentence.Split(wordSeperator, StringSplitOptions.RemoveEmptyEntries)
                           where words.Intersect(wordsToMatch).Count() == wordsToMatch.Count()
                           select sentence;
    
       foreach (string str in sentenceQuery)
       {
           Console.WriteLine(str);
        }
    View Code

    查询字符串中的字符(String实现了IEnumerable<char>, IEnumerable)

    string aString = "ABCDE99F-J74-12-89A";
    
    IEnumerable<char> stringQuery = from ch in aString
                                    where char.IsDigit(ch)
                                    select ch;
    
    // Execute the query  
    foreach (char c in stringQuery)
        Console.Write(c + " ");
    
    // Call the Count method on the existing query.  
    int count = stringQuery.Count();
    Console.WriteLine("Count = {0}", count);
    View Code

    LINQ查询与正则表达式合并

    namespace ConsoleApp4
    {
        class Program
        {
            static void Main(string[] args)
            {
                //文件夹路径,注意最后的斜杠
                string startFolder = @"C:Program Files (x86)Microsoft Visual Studio 14.0";
    
                IEnumerable<System.IO.FileInfo> fileList = GetFiles(startFolder);
    
                Regex searchTerm = new Regex(@"Visual (Basic|C#|C++|Studio)");
    
                var queryMatchingFiles = from file in fileList
                                         where file.Extension == ".htm"
                                         let fileText = System.IO.File.ReadAllText(file.FullName)
                                         let matches = searchTerm.Matches(fileText) //正则匹配文件内容
                                         where matches.Count > 0
                                         select new
                                         {
                                             path = file.FullName,
                                             matcheValues = matches.Select(x => x.Value)
                                         };           
            }
            
            static IEnumerable<System.IO.FileInfo> GetFiles(string path)
            {
                if (!System.IO.Directory.Exists(path))
                    throw new System.IO.DirectoryNotFoundException();
    
                string[] fileNames = null;
                List<System.IO.FileInfo> files = new List<System.IO.FileInfo>();
    
                fileNames = System.IO.Directory.GetFiles(path, "*.*", System.IO.SearchOption.AllDirectories);
                foreach (string name in fileNames)
                {
                    files.Add(new System.IO.FileInfo(name));
                }
                return files;
            }
        }
    }
    View Code

    列表求差值

    string[] names1 = { "aa","bb","cc"};
    string[] names2 = { "aa", "bb", "cc","dd","ee" };
    
    IEnumerable<string> differenceQuery =
      names2.Except(names1); //names1.Except(names2)结果为空序列
    
    foreach (string s in differenceQuery)
        Console.WriteLine(s);   
    
    //ouput:
    //dd
    //ee
    View Code

    按任意字段对结构化的文本数据进行排序(结构化的文本,是指数据排列有一定的规律,例如dbf、csv)

    假设有一个文件,score.csv。内容如下:

    名字,语文,数学,英语

    小敏,34,45,56

    小希,56,65,77

    小花,99,99,99

    string[] scores = System.IO.File.ReadAllLines(@"../../../scores.csv"); 
    
    Console.WriteLine("语文成绩从高到底排序:");
    
    var scoreQuery = from score in scores
               let fields = score.Split(",")
               orderby fields[1]  //语文成绩在第一列
               select score;
    
    foreach (string str in scoreQuery)
    {
        Console.WriteLine(str);
    }
    View Code

     合并和比较字符串集合(Union是数学意义上的合并,Concat是简单的联结)

    string[] fileA = System.IO.File.ReadAllLines(@"../../../names1.txt");
    string[] fileB = System.IO.File.ReadAllLines(@"../../../names2.txt");
    
    IEnumerable<string> concatQuery =
        fileA.Concat(fileB).OrderBy(s => s); //Concat 合并          
    
    IEnumerable<string> uniqueNamesQuery =
        fileA.Union(fileB).OrderBy(s => s); //Union 合并,使用比较器的合并,不进行重复的合并。
    View Code

    多个数据源填充一个集合

    假设names.csv内容如下:

    firstname,lastname,id

    Omelchenko,Svetlana,111

    mingming,chen,112

    假设scores.csv内容如下:

     id,语,数,英

    111,98,23,67,

    112,34,90,99

    namespace ConsoleApp4
    {
        class Program
        {
            static void Main(string[] args)
            {
                string[] names = System.IO.File.ReadAllLines(@"../../../names.csv");
                string[] scores = System.IO.File.ReadAllLines(@"../../../scores.csv");
    
                IEnumerable<Student> queryNamesScores =
                    from nameLine in names
                    let splitName = nameLine.Split(',')
                    from scoreLine in scores
                    let splitScore = scoreLine.Split(',')
                    where Convert.ToInt32(splitName[2]) == Convert.ToInt32(splitScore[0])
                    select new Student()
                    {
                        FirstName = splitName[0],
                        LastName = splitName[1],
                        ID = Convert.ToInt32(splitName[2]),
                        ExamScores = splitScore.Skip(1).Select(x => Convert.ToInt32(x)).ToList()
                    };
              
            }
        }
    
        class Student
        {
            public string FirstName { get; set; }
            public string LastName { get; set; }
            public int ID { get; set; }
            public List<int> ExamScores { get; set; }
        }
    }
    View Code

     使用“分组”与“合并”将一个文件拆成多个文件

    假设:names1.txt

    Bankov, Peter  
    Holm, Michael  
    Garcia, Hugo  
    Potra, Cristina  
    Noriega, Fabricio  
    Aw, Kam Foo  
    Beebe, Ann  
    Toyoshima, Tim  
    Guy, Wey Yuan  
    Garcia, Debra
    View Code

    names2.txt

    Liu, Jinghao  
    Bankov, Peter  
    Holm, Michael  
    Garcia, Hugo  
    Beebe, Ann  
    Gilchrist, Beth  
    Myrcha, Jacek  
    Giakoumakis, Leo  
    McLin, Nkenge  
    El Yassir, Mehdi
    View Code
    string[] fileA = System.IO.File.ReadAllLines(@"../../../names1.txt");
    string[] fileB = System.IO.File.ReadAllLines(@"../../../names2.txt");
    
    var mergeQuery = fileA.Union(fileA);           
    
    var groupQuery = from name in mergeQuery
                     group name by name[0] into g
                     orderby g.Key
                     select g;
    
    foreach (var g in groupQuery)
    {
        string fileName = @"../../../testFile_" + g.Key + ".txt";
    
        Console.WriteLine(g.Key);
    
        using (System.IO.StreamWriter sw = new System.IO.StreamWriter(fileName))
        {
            foreach (var item in g)
            {
                sw.WriteLine(item);                        
                Console.WriteLine("   {0}", item);
            }
        }
    }           
    View Code

    结果:

    CSV文本文件计算多列的值

    创建score.csv,内容如下:

    111, 97, 92, 81, 60  
    112, 75, 84, 91, 39  
    113, 88, 94, 65, 91  
    114, 97, 89, 85, 82  
    115, 35, 72, 91, 70  
    116, 99, 86, 90, 94  
    117, 93, 92, 80, 87  
    118, 92, 90, 83, 78  
    119, 68, 79, 88, 92  
    120, 99, 82, 81, 79  
    121, 96, 85, 91, 60  
    122, 94, 92, 91, 91
    View Code
    using System;
    using System.Collections.Generic;
    using System.Data;
    using System.Linq;
    
    namespace ConsoleApp4
    {
        class Program
        {
            static void Main(string[] args)
            {
                string[] lines = System.IO.File.ReadAllLines(@"../../../scores.txt");
                int exam = 3;         
                MultiColumns(lines);
            }
           
            private static void MultiColumns(string[] lines)
            {
                Console.WriteLine("Multi Column Query:");
    
                IEnumerable<IEnumerable<int>> multiColQuery =
                   from line in lines
                   let elements = line.Split(',')
                   let scores = elements.Skip(1)
                   select (from str in scores 
                           select Convert.ToInt32(str));
    
                int columnCount = multiColQuery.First().Count();
                
                for (int column = 0; column < columnCount; column++)
                {
                    var results2 = from row in multiColQuery
                                   select row.ElementAt(column);
                    double average = results2.Average();
                    int max = results2.Max();
                    int min = results2.Min();
                    
                    Console.WriteLine("Exam #{0} Average: {1:##.##} High Score: {2} Low Score: {3}",
                                  column + 1, average, max, min);
                }
            }
        }   
    }
    View Code
  • 相关阅读:
    Windows下好用的git客户端--GitExtentions
    高分辨率下放大netbeans中的小图标
    小书匠使用手册
    win8 telnet VirtualBox中的redhat9
    win8安装新字体
    netbeans设置字体
    win7下Chrome有两个图标的解决方法
    【转】HDU-6035-Colorful Tree
    POJ1703--Find them, Catch them(种类并查集)
    KMP的妙用(利用next数组寻找字符串的循环节)
  • 原文地址:https://www.cnblogs.com/bibi-feiniaoyuan/p/12390650.html
Copyright © 2011-2022 走看看