zoukankan      html  css  js  c++  java
  • Finder(文件内容搜索工具)

    搜索文件夹内容的小工具

    Github

    两种搜索模式的算法:

    BoyerMooreSearch.cs

    using System.Threading;
    using System.Collections.Generic;
    using System.Linq;
    
    namespace Finder.Algorithms
    {
        /// <summary>
        /// An implemention of Boyer-Moore algorithm.
        /// <para/>author : Ornithopter
        /// </summary>
        class BoyerMooreSearch : SearchBase
        {
            /// <summary>
            /// 
            /// </summary>
            /// <param name="source"></param>
            /// <param name="pattern"></param>
            /// <param name="token"></param>
            /// <returns>An array of matched index</returns>
            public int[] Search(string source, string pattern, CancellationToken token)
            {
                var matchIndexes = new List<int>();
    
                // step increasment.
                int delta;
    
                // prepare a map providing delta for each char in pattern string.
                var deltaMap = CreateDeltaMap(pattern);
    
                // start searching.
                for (var i = pattern.Length - 1; i < source.Length; i += delta)
                {
                    token.ThrowIfCancellationRequested();
                    // find next match and update delta.
                    if (FindNext(source, pattern, i, deltaMap, token, out delta))
                    {
                        // add to result list if found.
                        matchIndexes.Add(i - (pattern.Length - 1));
                    }
                }
                return matchIndexes.ToArray();
            }
    
            private static bool Match(string source, int[] deltaMap, string pattern, CancellationToken token)
            {
                // step increasment.
                int delta;
    
                // start searching.
                for (var i = pattern.Length - 1; i < source.Length; i += delta)
                {
                    token.ThrowIfCancellationRequested();
                    // find next match and update delta.
                    if (FindNext(source, pattern, i, deltaMap, token, out delta))
                    {
                        return true;
                    }
                }
                return false;
            }
    
            /// <summary>
            /// Find the next matched index and update delte at the same time.
            /// </summary>
            /// <param name="source"></param>
            /// <param name="pattern"></param>
            /// <param name="start"></param>
            /// <param name="deltaMap"></param>
            /// <param name="delta"></param>
            /// <returns>true if found one, otherwise false.</returns>
            private static bool FindNext(string source, string pattern, int start, int[] deltaMap, CancellationToken token, out int delta)
            {
                int i = pattern.Length - 1,
                    index = 0;
    
                // start comparing from the last char in pattern.
                while (source[start - index] == pattern[i - index])
                {
                    token.ThrowIfCancellationRequested();
                    if (index != pattern.Length - 1)
                    {
                        index++;
                    }
                    else
                    {
                        // matchs to the end. So it's a search result.
                        delta = pattern.Length;
                        return true;
                    }
                }
    
                // found one dismatched char at (start - index), get delta from map.
                var c = source[start - index];
                delta = /*c > 128 ? 0 : */deltaMap[c];
    
                if (delta == 0)
                {
                    // this means the source[start] char is the last char in pattern
                    // and only appears once. So delta should be the length of pattern.
                    delta = pattern.Length;
                }
                return false;
            }
    
            static int[] CreateDeltaMap(string pattern)
            {
                const int alphabetSize = 0xffff;
                var patternLength = pattern.Length;
                var deltaMap = new int[alphabetSize];
    
                // initialize the map.
                for (var i = 0; i < alphabetSize; i++)
                {
                    deltaMap[i] = patternLength;
                }
    
                // start from 0, which means any duplicated char will only have
                // the index nearest to the end.
                for (var i = 0; i < patternLength; i++)
                {
                    var index = pattern[i];
                    deltaMap[index] = patternLength - i - 1;
                }
                return deltaMap;
            }
    
            protected override void Build(CancellationToken token)
            {
                //throw new NotImplementedException();
            }
    
            public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)
            {
                var fileList = FileList;
    
                var deltaMap = CreateDeltaMap(keyword);
    
                return fileList.Where(filePath => Match(ReadContent(filePath), deltaMap, keyword, token)).ToArray();
            }
        }
    }
    View Code

    Trie.cs

    using System.Threading;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    
    namespace Finder.Algorithms
    {
        class Trie : SearchBase
        {
            class Node
            {
                public Dictionary<char, Node> Next { get; private set; }
                public Dictionary<int, bool> FileIndex { get; private set; }
    
                public Node()
                {
                    FileIndex = new Dictionary<int, bool>();
                    Next = new Dictionary<char, Node>();
                }
            }
    
            private static readonly HashSet<char> Ignorance = new HashSet<char>
            {
                '[',
                '(',
                '{',
            };
    
            private static readonly HashSet<char> Spliters = new HashSet<char>
            {
                ' ',
                ',',
                '
    ',
                '
    ',
                ';',
                ']',
                ')',
                '}',
            };
    
            private Node _root;
    
    
            protected override void Build(CancellationToken token)
            {
                _root = new Node();
    
                var index = 0;
                foreach (var filePath in FileList)
                {
                    token.ThrowIfCancellationRequested();
                    Build(index++, ReadContent(filePath));
                }
            }
    
            private void Build(int fileIndex, string content)
            {
                var node = _root;
                foreach (var t in content)
                {
                    var c = t;
    
                    if (Ignorance.Contains(c))
                    {
                        continue;
                    }
                    if (Spliters.Contains(c))
                    {
                        node.FileIndex[fileIndex] = true;
                        node = _root;
                        continue;
                    }
    
                    /*if (!Char.IsLetter(c))
                        continue;*/
    
                    if (Char.IsUpper(c))
                        c = Char.ToLower(c);
    
                    Node next;
                    if (!node.Next.TryGetValue(c, out next))
                    {
                        next = new Node();
                        node.Next[c] = next;
                    }
                    node = next;
                    node.FileIndex[fileIndex] = false;
                }
                node.FileIndex[fileIndex] = true;
            }
    
            private string[] Search(string keyword, bool matchWholeWord, CancellationToken token)
            {
                var node = _root;
                foreach (var t in keyword)
                {
                    token.ThrowIfCancellationRequested();
                    var c = t;
                    if (Char.IsUpper(c))
                        c = Char.ToLower(c);
    
                    Node next;
                    if (!node.Next.TryGetValue(c, out next))
                    {
                        return new string[0];
                    }
                    node = next;
                }
    
                if (matchWholeWord)
                {
                    return node.FileIndex.Where(_ => _.Value).Select(_ => FileList[_.Key]).ToArray();
                }
                return node.FileIndex.Select(_ => FileList[_.Key]).ToArray();
            }
    
            public const string ConfigMatchWholeWord = "ConfigMatchWholeWord";
    
            public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)
            {
                var matchWholeWord = (bool)config[ConfigMatchWholeWord];
                return Search(keyword, matchWholeWord, token);
            }
        }
    }
    View Code
  • 相关阅读:
    BeyondLinux_Since1991
    TED系列:我们究竟在教AI学习什么
    TED系列:算法的影响
    TED系列:代码-下一代的通用语言
    TED系列:互联网源起故事
    Hadoop-01 搭建hadoop伪分布式运行环境
    JavaSE-26 Swing
    JavaSE-25 AWT
    JavaSE-24 多线程
    JavaSE-23 注解
  • 原文地址:https://www.cnblogs.com/ornithopter/p/3812497.html
Copyright © 2011-2022 走看看