zoukankan      html  css  js  c++  java
  • 用mysql数据库写的分词算法代码

    我辛苦的整了几天才整好的 拿来给大家分享一下希望可以帮助大家 以下分为四步:每步都有注释说明的

        #region  一.先从article表里查询数据
        /// <summary>
        /// 一.先从article表里查询数据
        /// </summary>
        public void fenciBind()
        {
            string sql = "select * from article;";
            string str = ConfigurationManager.ConnectionStrings["ConnectionString"].ToString();
            MySqlConnection con = new MySqlConnection(str);
            con.Open();
            MySqlDataAdapter msda = new MySqlDataAdapter(sql, con);
            DataTable dt = new DataTable();
            msda.Fill(dt);
            for (int i = 0; i < dt.Rows.Count; i++)//循环数据库里的数据
            {
                string strcon = dt.Rows[i][1].ToString();//标题
                strcon += dt.Rows[i][3].ToString();//内容
                DateTime strtime = Convert.ToDateTime(dt.Rows[i][4]);//时间
    
                //判断时间 在一天内容不让他进行分词
                if (strtime < DateTime.Now.AddDays(-1))
                {
                    string artsrt = dt.Rows[i]["id"].ToString();
                    fencistr(strcon, artsrt);
                    con.Close();
                }
            }
        }
        #endregion
    
        #region  二.article表里的数据进行分词
        /// <summary>
        /// 二.article表里的数据进行分词
        /// </summary>
        /// <param name="strcon">article表里要分词标题和内容的数据</param>
        /// <param name="artsrt">article表里数据id编号</param>
        /// <returns></returns>
        public string fencistr(string strcon, string artsrt)
        {
            StringBuilder sb = new StringBuilder();
            sb.Remove(0, sb.Length);
            string t1 = "";
            Analyzer analyzer = new Lucene.China.ChineseAnalyzer();
            StringReader sr = new StringReader(strcon);
            TokenStream stream = analyzer.TokenStream(null, sr);
            Token t = stream.Next();
            while (t != null)
            {
                t1 = t.ToString();   //显示格式: (关键词,0,2) ,需要处理
                t1 = t1.Replace("(", "");
                char[] separator = { ',' };
                t1 = t1.Split(separator)[0];
                sb.Append("," + t1);
                t = stream.Next();
            }
            //三.汉字转换拼音
            pinyinstr(sb.ToString(), sb.ToString(), artsrt);
            return strcon;
        }
        #endregion
    
        #region 三.汉字转换拼音
        /// <summary>
        ///三.汉字转换拼音
        /// </summary>
        /// <param name="sb">用于转化为pingyin的汉字</param>
        /// <param name="sbstr">用于往keywords表里添加的汉字</param>
        /// <param name="artsrt">article表里数据id编号</param>
        /// <returns></returns>
        public string pinyinstr(string sb, string sbstr, string artsrt)
        {
            string pystr = null;//pinyin表用逗号隔开的集合
            string s = sb.ToString();
            for (int i = 0; i < s.Length; i++)
            {
                if (ChineseChar.IsValidChar(s[i]))
                {
                    ChineseChar CString = new ChineseChar(s[i]);
                    for (int ii = 0; ii < CString.PinyinCount; ii++)
                    {
                        string PinYins = CString.Pinyins[ii].ToString().ToLower();
                        if (PinYins[PinYins.Length - 1].CompareTo('5') < 0)
                        {
                            pystr += PinYins;
                        }
                    }
                    pystr += "|";
                }
                pystr += ",";
            }
            pystr = pystr.Replace("|,", "").TrimEnd(',');
            Opestr(pystr, sbstr, artsrt);//四.创建拼音数据表并添加数据
            return pystr;
        }
        #endregion
    
        #region 四.创建拼音数据表并添加数据 同时往keywords表里添加数据
        /// <summary>
        /// 创建拼音数据表并添加数据 同时往keywords表里添加数据
        /// </summary>
        /// <param name="pystr">要创建的每个pinyin表</param>
        /// <param name="sbstr">article表里的数据分词后逗号隔开的字符串</param>
        /// <param name="artsrt">article表里数据id编号</param>
        public void Opestr(string pystr, string sbstr, string artsrt)
        {
            string[] PinYins = pystr.Trim().Split(',');
            for (int i = 1; i < PinYins.Length; i++)
            {
                //四.创建拼音数据表并添加数据
                //************************************1.往keywords表里添加数据*********************************************
                //(1).查询keywords表,并判断keywords表里pinyin是否存在相同的
                string str = ConfigurationManager.ConnectionStrings["ConnectionString"].ToString();
                MySqlConnection con = new MySqlConnection(str);
                con.Open();
                string kwssql = "select * from `hww_article_search`.`keywords`";
                MySqlDataAdapter kwsmda = new MySqlDataAdapter(kwssql, con);
                DataTable kwdt = new DataTable();
                kwsmda.Fill(kwdt);
                string[] hzstr = sbstr.Trim().Split(',');//汉字用逗号分割的数据
                for (int ii = 1; ii < hzstr.Length; ii++)
                {
                    if (kwdt.Rows.Count != 0) //(2).如果pinyin表里有数据,则先进行判断是否有相同的pinyin值
                    {
                        string kwstr = null;
                        for (int ll = 0; ll < kwdt.Rows.Count; ll++)
                        {
                            kwstr += "," + kwdt.Rows[ll]["pinyin"].ToString();
                        }
                        string kwpy = PinYins[i];
                        if (!kwstr.Contains(kwpy))
                        {
                            //(3).不存在相同的pinyin则添加
                            string kwsql = "INSERT INTO `hww_article_search`.`keywords` (`keyword`, `pinyin`) VALUES ('" + hzstr[ii] + "', '" + PinYins[ii] + "');";
                            MySqlCommand kwcom = new MySqlCommand(kwsql, con);
                            kwcom.ExecuteNonQuery();//添加
                        }
                    }
                    else//如果没有数据则添加
                    {
                        string kwsql = "INSERT INTO `hww_article_search`.`keywords` (`keyword`, `pinyin`) VALUES ('" + hzstr[ii] + "', '" + PinYins[ii] + "');";
                        MySqlCommand kwcom = new MySqlCommand(kwsql, con);
                        kwcom.ExecuteNonQuery();//添加
                    }
                }
    
                //************************************************2.建库建表**************************************************
                //(1).先建库
                string sqlcre = "create table if not exists `hww_article_search`.`" + PinYins[i] + "` ( `id` int(10) not null auto_increment, `article_id` int(10) unsigned not null,primary key(`id`));";
                MySqlCommand com = new MySqlCommand(sqlcre, con);
                com.ExecuteNonQuery();
                //(2).查询pinyin表,并判断yinpin表里article_id是否存在此相同的id
                string sqlif = "select * from `hww_article_search`.`" + PinYins[i] + "`";
                MySqlDataAdapter msdaif = new MySqlDataAdapter(sqlif, con);
                DataTable dtif = new DataTable();
                msdaif.Fill(dtif);
                //(3).往pinyin表里添加数据sql语句
                string sqladd = " insert into `hww_article_search`.`" + PinYins[i] + "`(`article_id`) values(" + artsrt + ");";
                MySqlCommand comadd = new MySqlCommand(sqladd, con);
                //(4).添加成功后修改article表里的时间为当前时间sql语句
                string uptimesql = "update `hww_article_search`.`article` set `update_time`='" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "' where `id`=" + artsrt + ";";
                MySqlCommand comtime = new MySqlCommand(uptimesql, con);
                //如果pinyin表里有数据,则先进行判断是否有相同的id值
                if (dtif.Rows.Count != 0)
                {
                    string sdi = null;
                    for (int j = 0; j < dtif.Rows.Count; j++)
                    {
                        sdi += dtif.Rows[j]["article_id"].ToString() + ",";
                    }
                    string sad = artsrt;
                    if (!sdi.Contains(sad))
                    {
                        //3.不存在相同的id号则添加
                        comadd.ExecuteNonQuery();//添加
                        comtime.ExecuteNonQuery();//修改
                    }
                }
                else//如果没有数据则添加
                {
                    comadd.ExecuteNonQuery();//添加
                    comtime.ExecuteNonQuery();//修改
                }
            }
        }
        #endregion
    

      运行结果如下图:

     

  • 相关阅读:
    Eclipse导入Ant项目
    Eclipse修改默认包路径的起始文件夹
    Java中DAO/DTO/PO/VO/BO/QO/POJO
    FreeMarker与Spring MVC 4集合的HelloWorld示例
    FreeMarker与Spring MVC 4结合错误:Caused by: java.lang.NoClassDefFoundError: org/springframework/ui/freemarker/FreeMarkerConfiguration
    FreeMarker与Servlet结合示例
    FreeMarker-简单示例
    Java模板引擎-FreeMarker
    SiteMesh2-sitemesh.xml的其它映射器的用法
    SiteMesh2-sitemesh.xml的ParameterDecoratorMapper映射器的用法
  • 原文地址:https://www.cnblogs.com/hww9011/p/3003500.html
Copyright © 2011-2022 走看看