zoukankan      html  css  js  c++  java
  • pinyin4j的使用

    pinyin4j的使用
     
    pinyin4j是一个功能强悍的汉语拼音工具包,主要是从汉语获取各种格式和需求的拼音,功能强悍,下面看看如何使用pinyin4j。
     

    import java.util.HashSet;
    import java.util.Set;

    import net.sourceforge.pinyin4j.PinyinHelper;
    import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
    import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
    import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

    public class PingYingChange {

    /**
    * 获取汉字串拼音首字母,英文字符不变
    *
    * @param chinese 汉字串
    * @return 汉语拼音首字母
    */
    public static String cn2FirstSpell(String chinese) {
    StringBuffer pybf = new StringBuffer();
    char[] arr = chinese.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < arr.length; i++) {
    if (arr[i] > 128) {
    try {
    String[] _t = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
    if (_t != null) {
    pybf.append(_t[0].charAt(0));
    }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    } else {
    pybf.append(arr[i]);
    }
    }
    return pybf.toString().replaceAll("\W", "").trim();
    }

    /**
    * 获取汉字串拼音,英文字符不变
    *
    * @param chinese 汉字串
    * @return 汉语拼音
    */
    public static String cn2Spell(String chinese) {
    StringBuffer pybf = new StringBuffer();
    char[] arr = chinese.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < arr.length; i++) {
    if (arr[i] > 128) {
    try {
    pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
    } catch (BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    } else {
    pybf.append(arr[i]);
    }
    }
    return pybf.toString();
    }
    public static String cnToSpell(String chines) {

    String pinyinName = "";
    StringBuffer strbuf = new StringBuffer();
    char[] nameChar = chines.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < nameChar.length; i++) {
    char name = quanbianban(nameChar[i]);
    nameChar[i] = name;
    if (128 < nameChar[i]) {
    try {
    strbuf.append(PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat)[0].charAt(0));
    } catch (BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    } else {
    strbuf.append(nameChar[i]);
    }
    }

    pinyinName = strbuf.toString();

    return pinyinName;
    }

    public static char quanbianban(char quan) {
    switch (quan) {

    case '0':
    return '0';

    case '1':
    return '1';

    case '2':
    return '2';

    case '3':
    return '3';

    case '4':
    return '4';

    case '5':
    return '5';

    case '6':
    return '6';

    case '7':
    return '7';

    case '8':
    return '8';

    case '9':
    return '9';

    default:
    return quan;

    }
    }
    /**
    * 字符串集合转换字符串(逗号分隔)
    * @author wyh
    * @param stringSet
    * @return
    */
    public static String makeStringByStringSet(Set<String> stringSet){
    StringBuilder str = new StringBuilder();
    int i=0;
    for(String s : stringSet){
    if(i == stringSet.size() - 1){
    str.append(s);
    }else{
    str.append(s + ",");
    }
    i++;
    }
    return str.toString().toLowerCase();
    }

    /**
    * 获取拼音集合
    * @author wyh
    * @param src
    * @return Set<String>
    */
    public static Set<String> getPinyin(String src){
    if(src!=null && !src.trim().equalsIgnoreCase("")){
    char[] srcChar ;
    srcChar=src.toCharArray();
    //汉语拼音格式输出类
    HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat();

    //输出设置,大小写,音标方式等
    hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);

    String[][] temp = new String[src.length()][];
    for(int i=0;i<srcChar.length;i++){
    char c = srcChar[i];
    //是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
    if(String.valueOf(c).matches("[\u4E00-\u9FA5]+")){
    try{
    temp[i] = PinyinHelper.toHanyuPinyinStringArray(srcChar[i], hanYuPinOutputFormat);
    }catch(BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    }else if(((int)c>=65 && (int)c<=90) || ((int)c>=97 && (int)c<=122)){
    temp[i] = new String[]{String.valueOf(srcChar[i])};
    }else{
    temp[i] = new String[]{""};
    }
    }
    String[] pingyinArray = Exchange(temp);
    Set<String> pinyinSet = new HashSet<String>();
    for(int i=0;i<pingyinArray.length;i++){
    pinyinSet.add(pingyinArray[i]);
    }
    return pinyinSet;
    }
    return null;
    }

    /**
    * 递归
    * @author wyh
    * @param strJaggedArray
    * @return
    */
    public static String[] Exchange(String[][] strJaggedArray){
    String[][] temp = DoExchange(strJaggedArray);
    return temp[0];
    }

    /**
    * 递归
    * @author wyh
    * @param strJaggedArray
    * @return
    */
    private static String[][] DoExchange(String[][] strJaggedArray){
    int len = strJaggedArray.length;
    if(len >= 2){
    int len1 = strJaggedArray[0].length;
    int len2 = strJaggedArray[1].length;
    int newlen = len1*len2;
    String[] temp = new String[newlen];
    int Index = 0;
    for(int i=0;i<len1;i++){
    for(int j=0;j<len2;j++){
    temp[Index] = strJaggedArray[0][i] + strJaggedArray[1][j];
    Index ++;
    }
    }
    String[][] newArray = new String[len-1][];
    for(int i=2;i<len;i++){
    newArray[i-1] = strJaggedArray[i];
    }
    newArray[0] = temp;
    return DoExchange(newArray);
    }else{
    return strJaggedArray;
    }
    }

    public static void main(String[] args) throws Exception {
    HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

    // UPPERCASE:大写 (ZHONG)
    // LOWERCASE:小写 (zhong)
    format.setCaseType(HanyuPinyinCaseType.LOWERCASE);

    // WITHOUT_TONE:无音标 (zhong)
    // WITH_TONE_NUMBER:1-4数字表示英标 (zhong4)
    // WITH_TONE_MARK:直接用音标符(必须WITH_U_UNICODE否则异常) (zhòng)
    format.setToneType(HanyuPinyinToneType.WITH_TONE_MARK);

    // WITH_V:用v表示ü (nv)
    // WITH_U_AND_COLON:用"u:"表示ü (nu:)
    // WITH_U_UNICODE:直接用ü (nü)
    format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
    String[] pinyin = PinyinHelper.toHanyuPinyinStringArray('重', format);
    System.out.println(PinyinHelper.toHanyuPinyinStringArray('重', format)[1]);
    System.out.println(PingYingChange.cnToSpell("镇江abc"));
    System.out.println(PingYingChange.quanbianban('o'));
    String x = "嘅囧誰說壞學生來勼髮視頻裆児";
    System.out.println(cn2FirstSpell(x));
    System.out.println(cn2Spell(x));
    String str = "单田芳";
    System.out.println(makeStringByStringSet(getPinyin(str)));
    System.out.println("");
    }
    }

  • 相关阅读:
    Java基础——Instanceof 运算符
    算法——八皇后问题(eight queen puzzle)之回溯法求解
    浅析数据结构
    react給變量賦值并列元素
    如何使用npm构建一个react demo项目
    Java面试题 静态代码块 构造代码块 构造方法 的执行顺序
    Mysql 反向解析 导致远程访问慢
    Django admin管理工具
    Django-Ajax(85)
    jQuery快速入门
  • 原文地址:https://www.cnblogs.com/miaosj/p/6931639.html
Copyright © 2011-2022 走看看