zoukankan      html  css  js  c++  java
  • jieba分词/jieba-analysis(java版)

    简介

    支持分词模式
    Search模式,用于对用户查询词分词
    Index模式,用于对索引文档分词
    特性
    支持多种分词模式
    全角统一转成半角
    用户词典功能
    conf 目录有整理的搜狗细胞词库
    因为性能原因,最新的快照版本去除词性标注,也希望有更好的 Pull Request 可以提供该功能。

    简单使用

    获取jieba-analysis

    <dependency>
      <groupId>com.huaban</groupId>
      <artifactId>jieba-analysis</artifactId>
      <version>1.0.2</version>
    </dependency>

    案例

    复制代码
    @Test
    public void testDemo() {
        JiebaSegmenter segmenter = new JiebaSegmenter();
        String[] sentences =
            new String[] {"这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。", "我不喜欢日本和服。", "雷猴回归人间。",
                          "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作", "结果婚的和尚未结过婚的"};
        for (String sentence : sentences) {
            System.out.println(segmenter.process(sentence, SegMode.INDEX).toString());
        }
    }
    复制代码

    原文链接:https://github.com/huaban/jieba-analysis

    我的应用

    复制代码
    package com.analysis;
    

    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.util.List;
    import java.util.UUID;

    import org.junit.Before;
    import org.junit.Test;

    import com.huaban.analysis.jieba.JiebaSegmenter;
    import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
    import com.huaban.analysis.jieba.SegToken;

    public class jiebaTest {

    </span><span style="color: #0000ff;">private</span> Connection con = <span style="color: #0000ff;">null</span><span style="color: #000000;">;
    </span><span style="color: #0000ff;">private</span> PreparedStatement pstmt = <span style="color: #0000ff;">null</span><span style="color: #000000;">;
    
    </span><span style="color: #008000;">/**</span><span style="color: #008000;">
     * 连接
     </span><span style="color: #008000;">*/</span><span style="color: #000000;">
    @Before
    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span> beforeDemo() <span style="color: #0000ff;">throws</span><span style="color: #000000;"> Exception {
        Class.forName(</span>"com.mysql.jdbc.Driver"<span style="color: #000000;">);
        String url </span>= "jdbc:mysql://localhost:3306/test?user=root&amp;password=root"<span style="color: #000000;">;
        con </span>=<span style="color: #000000;"> DriverManager.getConnection(url);
    }
    
    </span><span style="color: #008000;">/**</span><span style="color: #008000;">
     * 分词查询测试
     </span><span style="color: #008000;">*/</span><span style="color: #000000;">
    @Test
    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span> getDemo() <span style="color: #0000ff;">throws</span><span style="color: #000000;"> Exception {
        BufferedReader br </span>= <span style="color: #0000ff;">new</span> BufferedReader(<span style="color: #0000ff;">new</span><span style="color: #000000;"> InputStreamReader(System.in));
        String str </span>=<span style="color: #000000;"> br.readLine();
    
        String sql </span>= "select * from t_jieba where name = ?"<span style="color: #000000;">;
        pstmt </span>=<span style="color: #000000;"> con.prepareStatement(sql);
    
        pstmt.setString(</span>1<span style="color: #000000;">, str);
        ResultSet rs </span>=<span style="color: #000000;"> pstmt.executeQuery();
    
        </span><span style="color: #0000ff;">while</span><span style="color: #000000;"> (rs.next()) {
            System.out.println(rs.getInt(</span>1)+"--"+rs.getString(2)+"--"+rs.getString(3)+"--"+rs.getString(4)+"--"+rs.getString(5<span style="color: #000000;">));
            pstmt.clearParameters();
            String sql1 </span>= "update t_jieba set times = ? where id = ?"<span style="color: #000000;">;
            pstmt </span>=<span style="color: #000000;"> con.prepareStatement(sql1);
            pstmt.setInt(</span>1, 1+ <span style="color: #0000ff;">new</span> Integer(rs.getString(5<span style="color: #000000;">)));
            pstmt.setInt(</span>2, rs.getInt(1<span style="color: #000000;">));
            pstmt.executeUpdate();
        }
        
        rs.close();
        pstmt.close();
    }
    
    </span><span style="color: #008000;">/**</span><span style="color: #008000;">
     * 分词插入测试
     </span><span style="color: #008000;">*/</span><span style="color: #000000;">
    @Test
    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span> addDemo() <span style="color: #0000ff;">throws</span><span style="color: #000000;"> Exception {
        String sql </span>= "insert into t_jieba (name,cid,c_name,times) select ?,?,?,? from DUAL where not EXISTS(select name from t_jieba where name=?)"<span style="color: #000000;">;
        pstmt </span>=<span style="color: #000000;"> con.prepareStatement(sql);
        JiebaSegmenter segmenter </span>= <span style="color: #0000ff;">new</span><span style="color: #000000;"> JiebaSegmenter();
        String[] sentences </span>= <span style="color: #0000ff;">new</span> String[] { "大话数据结构", "深入浅出设计模式", "JavaEE开发的颠覆者: Spring Boot实战", "java从入门到放弃"<span style="color: #000000;"> };
        </span><span style="color: #0000ff;">for</span><span style="color: #000000;"> (String sentence : sentences) {
            </span><span style="color: #008000;">//</span><span style="color: #008000;">System.out.println(segmenter.process(sentence, SegMode.INDEX).toString());</span>
            String uuid =<span style="color: #000000;"> UUID.randomUUID().toString();
            uuid </span>= uuid.replace("-", ""<span style="color: #000000;">);
            List</span>&lt;SegToken&gt; list =<span style="color: #000000;"> segmenter.process(sentence, SegMode.INDEX);
            </span><span style="color: #0000ff;">for</span><span style="color: #000000;"> (SegToken segToken : list) {
                String name </span>=<span style="color: #000000;"> segToken.word.trim();
                </span><span style="color: #0000ff;">if</span> (name != <span style="color: #0000ff;">null</span> &amp;&amp; !""<span style="color: #000000;">.equals(name)) {
                    pstmt.setString(</span>1<span style="color: #000000;">, segToken.word);
                    pstmt.setString(</span>2<span style="color: #000000;">, uuid);
                    pstmt.setString(</span>3<span style="color: #000000;">, sentence);
                    pstmt.setString(</span>4, "0"<span style="color: #000000;">);
                    pstmt.setString(</span>5<span style="color: #000000;">, segToken.word);
                    pstmt.executeUpdate();
                    pstmt.clearParameters();
                }
            }
        }
        pstmt.close();
        System.out.println(</span>"插入成功!"<span style="color: #000000;">);
    }
    

    }

    复制代码
    原文地址:https://www.cnblogs.com/bky-lzw/p/7799238.html
  • 相关阅读:
    011-通过网络协议解析网络请求-DNS-ARP-TCPIP
    010-HTTP协议
    009-DNS域名解析系统
    008-ICMP协议(网络控制文协议)
    007-IP报文协议
    007-排序算法-堆排序
    006-排序算法-希尔排序
    007-Linux 查看端口
    005-排序算法-归并排序
    004-排序算法-选择排序
  • 原文地址:https://www.cnblogs.com/jpfss/p/11413791.html
Copyright © 2011-2022 走看看