zoukankan      html  css  js  c++  java
  • 基于KWIC 的keyword匹配算法(管道+过滤器模式下实现)

    以下是基于KWIC 的keyword匹配算法(管道+过滤器模式下实现)

    关键部分的管道+过滤器 软件体系下的实现, 在非常多的keyword搜索平台都使用了这一 循环移位+排序输出的 keyword匹配算法:

    详细需求例如以下:

    1、使用管道-过滤器风格:
    每一个过滤器处理数据,然后将结果送至下一个过滤器,。

    要有数据传入,过滤器即開始工作。

    过滤器之间的数据共享被严格限制在管道传输
    四个过滤器:
    输入(Input filter):
    从数据源读取输入文件,解析格式,将行写入输出管道
    移位(CircularShifter filter):循环移位
    排序(Alphabetizer filter):
    输出(Output filter)
    管道:
      in_cs pipe
      cs_al pipe
      al_ou pile

    比如:


    代码例如以下:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.IO;
    
    namespace KWIC
    {
        /// <summary>
        /// 管道类
        /// </summary>
        public class Pipe
        {
          List<string> word;
          public List<string> read()
            {
                return word;
            }
          public void write(List<string> word)
            { this.word = word; }
        }
    
        /// <summary>
        /// 管道之间的过滤器接口
        /// </summary>
        public abstract class Filter
        {
          
            public virtual void Transform()
            { }
        }
    
        /// <summary>
        /// 继承并实现实现管道接口
        /// </summary>
        public class InputFilter : Filter
        {
    
            public Pipe outPipe;
            public List<string> word;
            public InputFilter(List<string> word, Pipe outPipe)
            {
                this.word = word;
                this.outPipe = outPipe;
            }
            public void Transform()
            {
                outPipe.write(word);
            }
        }
    
        /// <summary>
        /// 继承并实现过滤器接口
        /// </summary>
        public class CircleShiftFilter : Filter
        {
            public Pipe inputPipe;
            public Pipe outPipe;
            public CircleShiftFilter(Pipe inputPipe, Pipe outPipe)
            {
                this.inputPipe = inputPipe;
                this.outPipe = outPipe;
            }
            /// <summary>
            /// 关键的循环移位函数
            /// </summary>
            public virtual void Transform()
            {
                List<string> word = inputPipe.read();
    
                /////////////////////////////////////////////// 补充代码,将WORD数组中字符串循环移位////////////////////////////////////////////////////////
    
                List<string> turned_words = new List<string>();
    
                // 获得每一行字符串数据
                foreach (string line in word)
                { 
                    // 拆分一句话
                    string[] words = line.Split(' ');
    
                    // 获取单词数
                    ulong word_number = (ulong)words.LongLength;
    
                    // 暂时存储中间排序好的串
                    List<string> tmp_words = new List<string>();
                    
                    tmp_words.Clear();
    
                    tmp_words.Add(line);
    
                    string tmp_line = "";
    
                    for (ulong i = 0; i < word_number - 1; i++)
                    {
                        // 获取上一行串
                        tmp_line = tmp_words[tmp_words.Count - 1];
    
                        // 获取上一行串的最后一个单词
                        string last_word = tmp_line.Split(' ')[word_number -1];
    
                        // 获取上一行串的除了最后一个单词之外的全部单词
                        string left_words = tmp_line.Substring(0, (tmp_line.Length -last_word.Length-1 ));
    
                        tmp_words.Add(last_word +" "+ left_words ); 
                    }
    
                    // 移除原有的串
                    tmp_words.RemoveAt(0);
    
                    // 将一句移位的串加到暂时的list集合
                    turned_words.AddRange(tmp_words);
    
                }
    
                // 将全部移位的串加到原来list集合
                word.AddRange(turned_words);
    
                /////////////////////////////////////
                outPipe.write(word); 
            
            }
        }
    
        /// <summary>
        /// 实现的排序过滤器类
        /// </summary>
        public class AlphaFilter : Filter
        {
            public Pipe inputPipe;
            public Pipe outPipe;
            public AlphaFilter(Pipe inputPipe, Pipe outPipe)
            {
                this.inputPipe = inputPipe;
                this.outPipe = outPipe;
            }
    
            /// <summary>
            /// 排序输出函数
            /// </summary>
            public void Transform()
            {
                List<string> word = inputPipe.read();
    
                ////////////////////////////////////// 补充代码,将word数组中单词排序输出/////////////////////////////////////////////////
                word.Sort();
    
                outPipe.write(word); 
           
            }
        }
    
        /// <summary>
        /// 实现输出过滤器接口类
        /// </summary>
        public class OutputFilter : Filter
        {
            public Pipe inputPipe;
            public Pipe outPipe;
            public OutputFilter(Pipe inputPipe, Pipe outPipe)
            {
                this.inputPipe = inputPipe; this.outPipe = outPipe;
                
            }
            public  void Transform()
            {
                List<string> word = inputPipe.read();
                outPipe.write(word); 
            }
        }
    
        /// <summary>
        /// 程序的总体执行框架
        /// </summary>
        public class KWIC_System
        {
    
            Pipe in_cs; // create three objects of Pipe
            Pipe cs_al; // and one object of type
            Pipe al_ou; // FileInputStream
            Pipe ou_ui; // FileInputStream
            InputFilter inputFilter;
            CircleShiftFilter shifter;
            AlphaFilter alpha;
            OutputFilter output; // output to screen
          public   KWIC_System()
            {
                in_cs = new Pipe(); // create three objects of Pipe
                cs_al = new Pipe(); // and one object of type
                al_ou = new Pipe(); // FileInputStream
                ou_ui = new Pipe(); // FileInputStream
    
                List<string> word = new List<string>();
    	    word.Add(Regex.Replace("I love you".Trim(), @"s+", " ")); //正则会获取到全部类型的空格(比方制表符。新行等等),然后将其替换为一个空格  
                word.Add(Regex.Replace("me too".Trim(), @"s+", " "));  
                word.Add(Regex.Replace("do you know".Trim(), @"s+", " "));  
    
    
                inputFilter = new InputFilter(word, in_cs);
                shifter = new CircleShiftFilter(in_cs, cs_al);
                alpha = new AlphaFilter(cs_al, al_ou);
                output = new OutputFilter(al_ou,ou_ui); // output to screen
            }
            public List<string > GetResult()
            {
                inputFilter.Transform();
                shifter.Transform();
                alpha.Transform();
                output.Transform();
    
                return ou_ui.read();
            }
    
    
        }
    
    }
    

    (备注:假设想换行这里想换行输出,须要在结尾输出的每一行结尾加‘ ’)

    在广泛的搜索技术中。事实上这个keyword匹配算法应用范围非常广,比方我们常见的Baidu和Google的搜索keyword 提示功能。
    
    
    
    
    
    
    
    

    个人论坛:http://itpark.sinaapp.com/
  • 相关阅读:
    164 Maximum Gap 最大间距
    162 Find Peak Element 寻找峰值
    160 Intersection of Two Linked Lists 相交链表
    155 Min Stack 最小栈
    154 Find Minimum in Rotated Sorted Array II
    153 Find Minimum in Rotated Sorted Array 旋转数组的最小值
    152 Maximum Product Subarray 乘积最大子序列
    151 Reverse Words in a String 翻转字符串里的单词
    bzoj3994: [SDOI2015]约数个数和
    bzoj 4590: [Shoi2015]自动刷题机
  • 原文地址:https://www.cnblogs.com/yutingliuyl/p/6698605.html
Copyright © 2011-2022 走看看