zoukankan      html  css  js  c++  java
  • 中文分词 新建索引 更新索引

      1  class ChinaIndex
      2     {
      3  
      4        // string indexXmlPath = Assembly.GetExecutingAssembly().Location + "\\index.xml";
      5 
      6         private string activityIndexId = "0", lineIndexId = "0", poiIndexId = "0", journeyIndexId = "0", memberIndexId = "0", companyIndexId = "0";
      7         
      8         public void CreateIndex()
      9         {
     10             var configFileMap = new System.Configuration.AppSettingsReader();
     11  
     12             string indexStorePath = configFileMap.GetValue("storePath", "".GetType()).ToString();// System.AppDomain.CurrentDomain.BaseDirectory;
     13             ReadXmlIndex(indexStorePath + "\\index.xml");
     14             IndexWriter writer = null;
     15             try
     16             {
     17                 writer = new IndexWriter(indexStorePath, new ChineseAnalyzer(), false);
     18             }
     19             catch
     20             {
     21                 writer = new IndexWriter(indexStorePath, new ChineseAnalyzer(), true);
     22             }
     23             
     24         
     25             SqlDataReader activiyData = ExecuteQuery("select a.name,a.userName,a.way,a.tags,b.name, a.ID,a.userPic,a.pic,a.summary,a.comentCount,a.upCount,a.memberID from dbo.MY_Activity a left join  dbo.MY_Area b on a.areaID=b.ID where a.ID>" + activityIndexId + " order by a.ID asc");
     26             SqlDataReader lineData = ExecuteQuery("select a.name,a.startCity, a.departureDates,a.traffic,a.content,a.price,a.days,a.travelType,a.ID,a.tags,a.price1,a.priceMinor,a.pic,b.name,a.companyID from dbo.MY_Line a left join MY_Member_CompanyDetail b on a.companyID=b.ID where a.ID> " + lineIndexId + " order by a.ID asc  ");
     27             SqlDataReader placeData = ExecuteQuery("select a.ID,a.name,a.pic,a.tags,a.address,a.summary,a.themeType,a.status,b.name from MY_Place a left join MY_Area b on a.areaID=b.ID where a.ID>" + poiIndexId + " order by a.ID asc");
     28             SqlDataReader JourneyData = ExecuteQuery("Select ID,title,summary,addTime,userName,userPIc,hotAmount,commentAbount,upCount,tags,userid,forwardamount From [MY_Journey] where ID>" + journeyIndexId + "  order by ID asc");
     29             SqlDataReader MemberData = ExecuteQuery("Select ID,nickName,dreams,dreamsPlace,sex,regTime,pic,bideCity From [MY_Member] where  GroupID=0 and (ID  <=10000 or ID>=180000) and ID>" + memberIndexId + " order by ID asc");
     30             SqlDataReader MemberCompanyData = ExecuteQuery("select b.name,a.ID,a.name,a.content,a.banner,a.rank,a.status,a.tel,a.memberID from MY_Member_CompanyDetail a left join MY_Area b on a.areaID=b.ID left join MY_Member c on  a.memberID=c.ID where c.GroupID=1 and a.ID>" + companyIndexId + " order by a.ID asc");
     31             while (activiyData.Read())
     32             {
     33                 Document doc = new Document();
     34                 doc.Add(new Field("ActiviyName", activiyData[0].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     35                 doc.Add(new Field("ActiviyUserName", activiyData[1].ToString(), Field.Store.YES, Field.Index.NO));
     36                 doc.Add(new Field("ActiviyWay", activiyData[2].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     37                 doc.Add(new Field("ActiviyTags", activiyData[3].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     38                 doc.Add(new Field("ActiviyAddress", activiyData[4].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     39                 doc.Add(new Field("ActiviyID", activiyData[5].ToString(), Field.Store.YES, Field.Index.NO));
     40                 doc.Add(new Field("ActiviyUserPic", activiyData[6].ToString().Replace(".jpg", "_50X50.jpg"), Field.Store.YES, Field.Index.NO));
     41                 doc.Add(new Field("ActiviyPic", activiyData[7].ToString() + "-cover.jpg", Field.Store.YES, Field.Index.NO));
     42                 doc.Add(new Field("ActiviySummary", activiyData[8].ToString(), Field.Store.YES, Field.Index.NO));
     43                 doc.Add(new Field("ActiviyComentCount", activiyData[9].ToString(), Field.Store.YES, Field.Index.NO));
     44                 doc.Add(new Field("ActiviyUpCount", activiyData[10].ToString(), Field.Store.YES, Field.Index.NO));
     45                 doc.Add(new Field("ActiviyMemerId", activiyData[11].ToString(), Field.Store.YES, Field.Index.NO));
     46                 activityIndexId = activiyData[5].ToString();
     47                 writer.AddDocument(doc);
     48             }
     49 
     50             while (lineData.Read())
     51             {
     52                 Document doc = new Document();
     53                 doc.Add(new Field("LineName", lineData[0].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     54                 doc.Add(new Field("LineStartCity", lineData[1].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     55                 doc.Add(new Field("LineTraffic", lineData[3].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     56                 doc.Add(new Field("LineContent", lineData[4].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     57                 doc.Add(new Field("LinePrcie", UpdateString(lineData[5].ToString() != "" ? lineData[5].ToString().Remove(lineData[5].ToString().Length - 3) : "", 8), Field.Store.YES, Field.Index.UN_TOKENIZED));
     58                 doc.Add(new Field("LineDays", UpdateString(lineData[6].ToString() != "" ? lineData[6].ToString() : "", 3), Field.Store.YES, Field.Index.UN_TOKENIZED));
     59                 if (lineData[7].ToString() == "1")
     60                     doc.Add(new Field("LineTravelType", "跟团游", Field.Store.YES, Field.Index.TOKENIZED));
     61                 if (lineData[7].ToString() == "2")
     62                     doc.Add(new Field("LineTravelType", "纯玩跟团", Field.Store.YES, Field.Index.TOKENIZED));
     63                 if (lineData[7].ToString() == "3")
     64                     doc.Add(new Field("LineTravelType", "自驾游", Field.Store.YES, Field.Index.TOKENIZED));
     65                 doc.Add(new Field("LineId", lineData[8].ToString(), Field.Store.YES, Field.Index.NO));
     66                 doc.Add(new Field("LineTags", lineData[9].ToString(), Field.Store.YES, Field.Index.NO));
     67                 doc.Add(new Field("LinePrice1", UpdateString(lineData[10].ToString() != "" ? lineData[10].ToString().Remove(lineData[10].ToString().Length - 3) : "", 8), Field.Store.YES, Field.Index.NO));
     68                 doc.Add(new Field("LinePriceMinor", lineData[11].ToString(), Field.Store.YES, Field.Index.NO));
     69                 doc.Add(new Field("LinePic", lineData[12].ToString() + "-cover.jpg", Field.Store.YES, Field.Index.NO));
     70                 doc.Add(new Field("LineCompanyName", lineData[13].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     71                 doc.Add(new Field("LineCompanyId", lineData[14].ToString(), Field.Store.YES, Field.Index.NO));
     72                 writer.AddDocument(doc);
     73                 lineIndexId = lineData[8].ToString();
     74             }
     75 
     76 
     77             while (placeData.Read())
     78             {
     79                 Document doc = new Document();
     80                 doc.Add(new Field("PlaceID", placeData[0].ToString(), Field.Store.YES, Field.Index.NO));
     81                 doc.Add(new Field("PlaceName", placeData[1].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     82                 doc.Add(new Field("PlacePic", placeData[2].ToString(), Field.Store.YES, Field.Index.NO));
     83                 doc.Add(new Field("PlaceTags", placeData[3].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     84                 doc.Add(new Field("PlaceAddress", placeData[4].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     85                 doc.Add(new Field("PlaceSummary", placeData[5].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
     86                 string theme = "";
     87                 switch (placeData[6].ToString())
     88                 {
     89                     case "1": theme = "温泉"; break;
     90                     case "2": theme = "古镇"; break;
     91                     case "3": theme = "摄影"; break;
     92                     case "4": theme = "滑雪"; break;
     93                     case "5": theme = "海边"; break;
     94                     case "6": theme = "美食"; break;
     95                     case "7": theme = "登山"; break;
     96                     case "8": theme = "主题公园"; break;
     97                 }
     98 
     99                 doc.Add(new Field("PlaceThemeType", theme, Field.Store.YES, Field.Index.TOKENIZED));
    100 
    101                 string level = "";
    102                 switch (placeData[7].ToString())
    103                 {
    104                     case "1": theme = "5A级"; break;
    105                     case "2": theme = "4A级"; break;
    106                     case "3": theme = "3A级"; break;
    107                     case "4": theme = "2A级"; break;
    108                 }
    109                 doc.Add(new Field("PlaceStatus", level, Field.Store.YES, Field.Index.TOKENIZED));
    110                 doc.Add(new Field("PlaceCity", placeData[8].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    111                 poiIndexId = placeData[0].ToString();
    112                 writer.AddDocument(doc);
    113             }
    114 
    115 
    116             while (JourneyData.Read())
    117             {
    118                 Document doc = new Document();
    119                 doc.Add(new Field("JourneyID", JourneyData[0].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    120                 doc.Add(new Field("JourneyTitle", JourneyData[1].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    121                 doc.Add(new Field("JourneySummary", JourneyData[2].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    122                 doc.Add(new Field("JourneyAddTime", JourneyData[3].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    123                 doc.Add(new Field("JourneyUserName", JourneyData[4].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    124                 doc.Add(new Field("JourneyUserPIc", JourneyData[5].ToString(), Field.Store.YES, Field.Index.NO));
    125                 doc.Add(new Field("JourneyHotAmount", JourneyData[6].ToString(), Field.Store.YES, Field.Index.NO));
    126                 doc.Add(new Field("JourneyCommentAbount", JourneyData[7].ToString(), Field.Store.YES, Field.Index.NO));
    127                 doc.Add(new Field("JourneyTags", JourneyData[8].ToString(), Field.Store.YES, Field.Index.NO));
    128                 doc.Add(new Field("JourneyUserid", JourneyData[10].ToString(), Field.Store.YES, Field.Index.NO));
    129                 doc.Add(new Field("Journeyforwardamount", JourneyData[11].ToString(), Field.Store.YES, Field.Index.NO));
    130                 journeyIndexId = JourneyData[0].ToString();
    131                 writer.AddDocument(doc);
    132             
    133             }
    134             while (MemberData.Read())
    135             {
    136 
    137                 Document doc = new Document();
    138                 doc.Add(new Field("MemberID", MemberData[0].ToString(), Field.Store.YES, Field.Index.NO));
    139                 doc.Add(new Field("MemberNickName", MemberData[1].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    140                 doc.Add(new Field("MemberDreams", MemberData[2].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    141                 doc.Add(new Field("MemberDreamsPlace", MemberData[3].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    142                 doc.Add(new Field("MemberSex", MemberData[4].ToString(), Field.Store.YES, Field.Index.NO));
    143                 doc.Add(new Field("MemberRegTime", MemberData[5].ToString(), Field.Store.YES, Field.Index.NO));
    144                 doc.Add(new Field("MemberPic", MemberData[6].ToString(), Field.Store.YES, Field.Index.NO));
    145                 doc.Add(new Field("MemberBideCity", MemberData[7].ToString(), Field.Store.YES, Field.Index.NO));
    146                 memberIndexId = MemberData[0].ToString();
    147                 writer.AddDocument(doc);
    148 
    149             }
    150             while (MemberCompanyData.Read())
    151             {
    152 
    153                 Document doc = new Document();
    154                 doc.Add(new Field("MemberCompanyAreaName", MemberCompanyData[0].ToString(), Field.Store.YES, Field.Index.NO));
    155                 doc.Add(new Field("MemberCompanyID", MemberCompanyData[1].ToString(), Field.Store.YES, Field.Index.NO));
    156                 doc.Add(new Field("MemberCompanyName", MemberCompanyData[2].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    157                 doc.Add(new Field("MemberCompanyContent", MemberCompanyData[3].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
    158                 doc.Add(new Field("MemberCompanyBanner", MemberCompanyData[4].ToString(), Field.Store.YES, Field.Index.NO));
    159                 doc.Add(new Field("MemberCompanyRank", MemberCompanyData[5].ToString(), Field.Store.YES, Field.Index.NO));
    160                 doc.Add(new Field("MemberCompanyStatus", MemberCompanyData[6].ToString(), Field.Store.YES, Field.Index.NO));
    161                 doc.Add(new Field("MemberCompanyTel", MemberCompanyData[7].ToString(), Field.Store.YES, Field.Index.NO));
    162                 doc.Add(new Field("MemberCompanyMemberID", MemberCompanyData[8].ToString(), Field.Store.YES, Field.Index.NO));
    163                 companyIndexId = MemberCompanyData[1].ToString();
    164                 writer.AddDocument(doc);
    165      
    166             }
    167 
    168             JourneyData.Close();
    169             JourneyData.Dispose();
    170             MemberData.Close();
    171             MemberData.Dispose();
    172             MemberCompanyData.Close();
    173             MemberCompanyData.Dispose();
    174             activiyData.Close();
    175             activiyData.Dispose();
    176             lineData.Close();
    177             lineData.Dispose();
    178             lineData.Close();
    179             lineData.Dispose();
    180             writer.Optimize();
    181             writer.Close();
    182             SetIndexFile(indexStorePath + "\\index.xml");
    183         }
    184 
    185         /// <summary>
    186         /// 构造索引中范围查询的字段的数据
    187         /// </summary>
    188         /// <param name="str"></param>
    189         /// <param name="length"></param>
    190         /// <returns></returns>
    191         private string UpdateString(string str, int length)
    192         {
    193             string uStr = str;
    194             if (!string.IsNullOrEmpty(str))
    195             {
    196                 for (int i = str.Length; i < length; i++)
    197                 {
    198                     uStr = uStr.Insert(0, "0");
    199                 }
    200             }
    201             return uStr;
    202         }
    203 
    204 
    205         private SqlDataReader ExecuteQuery(string sqlStr)
    206         {
    207             var configFileMap = new System.Configuration.AppSettingsReader();
    208             string connstr = configFileMap.GetValue("connectionStr", "".GetType()).ToString();
    209             SqlConnection con = new SqlConnection(connstr);
    210             if (con.State == ConnectionState.Closed) con.Open();
    211             SqlCommand com = new SqlCommand(sqlStr, con);
    212             SqlDataReader dataReader = com.ExecuteReader();
    213             return dataReader;
    214         }
    215 
    216         /// <summary>
    217         /// 记录本次更新的最大Id
    218         /// </summary>
    219         private void SetIndexFile(string path)
    220         {
    221             var doc = new XDocument(
    222                             new XElement("Indexs",
    223                             new XElement("index",
    224                             new XAttribute("id", "01"),
    225                             new XElement("activityIndexId", activityIndexId),
    226                             new XElement("lineIndexId", lineIndexId),
    227                              new XElement("poiIndexId", poiIndexId),
    228                             new XElement("journeyIndexId", journeyIndexId),
    229                              new XElement("memberIndexId", memberIndexId),
    230                             new XElement("companyIndexId", companyIndexId)
    231                             )));
    232             doc.Save(path);
    233         }
    234 
    235         /// <summary>
    236         /// 读取xml中indexId
    237         /// </summary>
    238         /// <param name="filename"></param>
    239         public void ReadXmlIndex(string filename)
    240         {
    241             XmlDocument xmlDoc = new XmlDocument();
    242             try
    243             {
    244                 xmlDoc.Load(filename);
    245                 XmlNode root = xmlDoc.SelectSingleNode("//index");
    246                 if (root != null)
    247                 {
    248                     activityIndexId = (root.SelectSingleNode("activityIndexId")).InnerText;
    249                     lineIndexId = (root.SelectSingleNode("lineIndexId ")).InnerText;
    250                     poiIndexId = root.SelectSingleNode("poiIndexId").InnerText;
    251                     journeyIndexId = (root.SelectSingleNode("journeyIndexId")).InnerText;
    252                     memberIndexId = (root.SelectSingleNode("memberIndexId ")).InnerText;
    253                     companyIndexId = root.SelectSingleNode("companyIndexId").InnerText;
    254                 }
    255             }
    256             catch (Exception e)
    257             {
    258                 //显示错误信息
    259                 //  Console.WriteLine(e.Message);
    260             }
    261         }
    262     }
    View Code
  • 相关阅读:
    HDU ACM 1020 Encoding
    HDU ACM 1019 Least Common Multiple
    HDU ACM 1009 FatMouse' Trade
    HDU ACM 1032 The 3n + 1 problem
    HD ACM 1061 Rightmost Digit
    UVa 401 Palindromes
    UVa 489 Hangman Judge
    HDU ACM 1071 The area
    5/25
    受涼6/8
  • 原文地址:https://www.cnblogs.com/lx0551/p/3103034.html
Copyright © 2011-2022 走看看